diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
76 files changed, 9009 insertions, 2765 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 09c68dda2098..55317081d48b 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -120,7 +120,6 @@ __dma_fence_signal__notify(struct dma_fence *fence, struct dma_fence_cb *cur, *tmp; lockdep_assert_held(fence->lock); - lockdep_assert_irqs_disabled(); list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); @@ -134,9 +133,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; + unsigned long flags; LIST_HEAD(signal); - spin_lock(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -182,30 +182,23 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) } } - spin_unlock(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); struct list_head cb_list; - spin_lock(&rq->lock); + spin_lock_irqsave(&rq->lock, flags); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); __dma_fence_signal__notify(&rq->fence, &cb_list); - spin_unlock(&rq->lock); + spin_unlock_irqrestore(&rq->lock, flags); i915_request_put(rq); } } -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - local_irq_disable(); - intel_engine_breadcrumbs_irq(engine); - local_irq_enable(); -} - static void signal_irq_work(struct irq_work *work) { struct intel_engine_cs *engine = @@ -275,7 +268,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) bool i915_request_enable_breadcrumb(struct i915_request *rq) { lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; @@ -325,7 +317,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); /* * We must wait for b->irq_lock so that we know the interrupt handler diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index f55691d151ae..ee9d2bcd2c13 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -13,6 +13,7 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_ring.h" static struct i915_global_context { struct i915_global base; @@ -62,7 +63,7 @@ int __intel_context_do_pin(struct intel_context *ce) } err = 0; - with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) + with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) err = ce->ops->pin(ce); if (err) goto err; @@ -134,10 +135,11 @@ static int __context_pin_state(struct i915_vma *vma) static void __context_unpin_state(struct i915_vma *vma) { - __i915_vma_unpin(vma); i915_vma_make_shrinkable(vma); + __i915_vma_unpin(vma); } +__i915_active_call static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); @@ -150,6 +152,7 @@ static void __intel_context_retire(struct i915_active *active) intel_timeline_unpin(ce->timeline); intel_ring_unpin(ce->ring); + intel_context_put(ce); } @@ -219,12 +222,20 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; + GEM_BUG_ON(!engine->cops); kref_init(&ce->ref); ce->gem_context = ctx; - ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm); + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (vm) + ce->vm = i915_vm_get(vm); + else + ce->vm = i915_vm_get(&engine->gt->ggtt->vm); + rcu_read_unlock(); if (ctx->timeline) ce->timeline = intel_timeline_get(ctx->timeline); @@ -238,7 +249,7 @@ intel_context_init(struct intel_context *ce, mutex_init(&ce->pin_mutex); - i915_active_init(ctx->i915, &ce->active, + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire); } @@ -298,14 +309,14 @@ int intel_context_prepare_remote_request(struct intel_context *ce, /* Only suitable for use in remotely modifying this context */ GEM_BUG_ON(rq->hw_context == ce); - if (rq->timeline != tl) { /* beware timeline sharing */ + if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ err = mutex_lock_interruptible_nested(&tl->mutex, SINGLE_DEPTH_NESTING); if (err) return err; /* Queue this switch after current activity by this context. */ - err = i915_active_request_set(&tl->last_request, rq); + err = i915_active_fence_set(&tl->last_request, rq); mutex_unlock(&tl->mutex); if (err) return err; @@ -319,7 +330,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, * words transfer the pinned ce object to tracked active request. */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); - return i915_active_ref(&ce->active, rq->timeline, rq); + return i915_active_add_request(&ce->active, rq); } struct i915_request *intel_context_create_request(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index dd742ac2fbdb..68b3d317d959 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_ring_types.h" #include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index bf9cedfccbf0..6959b05ae5f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -58,6 +58,7 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + u32 tag; /* cookie passed to HW to track this context on submission */ unsigned int active_count; /* protected by timeline->mutex */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 22aab8593abf..bc3b72bfa9e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,6 +19,7 @@ #include "intel_workarounds.h" struct drm_printer; +struct intel_gt; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -89,38 +90,6 @@ struct drm_printer; /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -enum intel_engine_hangcheck_action { - ENGINE_IDLE = 0, - ENGINE_WAIT, - ENGINE_ACTIVE_SEQNO, - ENGINE_ACTIVE_HEAD, - ENGINE_ACTIVE_SUBUNITS, - ENGINE_WAIT_KICK, - ENGINE_DEAD, -}; - -static inline const char * -hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) -{ - switch (a) { - case ENGINE_IDLE: - return "idle"; - case ENGINE_WAIT: - return "wait"; - case ENGINE_ACTIVE_SEQNO: - return "active seqno"; - case ENGINE_ACTIVE_HEAD: - return "active head"; - case ENGINE_ACTIVE_SUBUNITS: - return "active subunits"; - case ENGINE_WAIT_KICK: - return "wait kick"; - case ENGINE_DEAD: - return "dead"; - } - - return "unknown"; -} static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) @@ -206,126 +175,13 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_WRITE_INDEX 0x1f #define CNL_HWS_CSB_WRITE_INDEX 0x2f -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring); -void intel_ring_reset(struct intel_ring *ring, u32 tail); -unsigned int intel_ring_update_space(struct intel_ring *ring); -void intel_ring_unpin(struct intel_ring *ring); -void intel_ring_free(struct kref *ref); - -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) -{ - kref_get(&ring->ref); - return ring; -} - -static inline void intel_ring_put(struct intel_ring *ring) -{ - kref_put(&ring->ref, intel_ring_free); -} - void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); -int __must_check intel_ring_cacheline_align(struct i915_request *rq); - -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); - -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) -{ - /* Dummy function. - * - * This serves as a placeholder in the code so that the reader - * can compare against the preceding intel_ring_begin() and - * check that the number of dwords emitted matches the space - * reserved for the command packet (i.e. the value passed to - * intel_ring_begin()). - */ - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); -} - -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) -{ - return pos & (ring->size - 1); -} - -static inline bool -intel_ring_offset_valid(const struct intel_ring *ring, - unsigned int pos) -{ - if (pos & -ring->size) /* must be strictly within the ring */ - return false; - - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ - return false; - - return true; -} - -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) -{ - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - rq->ring->vaddr; - GEM_BUG_ON(offset > rq->ring->size); - return intel_ring_wrap(rq->ring, offset); -} - -static inline void -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) -{ - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); - - /* - * "Ring Buffer Use" - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - * - * We use ring->head as the last known location of the actual RING_HEAD, - * it may have advanced but in the worst case it is equally the same - * as ring->head and so we should never program RING_TAIL to advance - * into the same cacheline as ring->head. - */ -#define cacheline(a) round_down(a, CACHELINE_BYTES) - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && - tail < ring->head); -#undef cacheline -} - -static inline unsigned int -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) -{ - /* Whilst writes to the tail are strictly order, there is no - * serialisation between readers and the writers. The tail may be - * read by i915_request_retire() just as it is being updated - * by execlists, as although the breadcrumb is complete, the context - * switch hasn't been seen. - */ - assert_ring_tail_valid(ring, tail); - ring->tail = tail; - return tail; -} - -static inline unsigned int -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - -int intel_engines_init_mmio(struct drm_i915_private *i915); -int intel_engines_setup(struct drm_i915_private *i915); -int intel_engines_init(struct drm_i915_private *i915); -void intel_engines_cleanup(struct drm_i915_private *i915); +int intel_engines_init_mmio(struct intel_gt *gt); +int intel_engines_setup(struct intel_gt *gt); +int intel_engines_init(struct intel_gt *gt); +void intel_engines_cleanup(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -349,7 +205,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void @@ -422,8 +277,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, engine->serial++; /* contexts lost */ } -bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct intel_gt *gt); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +void intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); @@ -434,61 +290,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -static inline void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static inline void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - int intel_enable_engine_stats(struct intel_engine_cs *engine); void intel_disable_engine_stats(struct intel_engine_cs *engine); @@ -525,4 +326,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 +static inline bool +intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return false; + + return intel_engine_has_preemption(engine); +} + +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return intel_engine_has_semaphores(engine); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4ce8626b140e..5ca3ec911e50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -37,6 +37,7 @@ #include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" +#include "intel_ring.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -277,6 +278,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) + return -EINVAL; + if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; @@ -293,6 +297,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); engine->id = id; + engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); engine->i915 = gt->i915; engine->gt = gt; @@ -304,6 +309,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.heartbeat_interval_ms = + CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.preempt_timeout_ms = + CONFIG_DRM_I915_PREEMPT_TIMEOUT; + engine->props.stop_timeout_ms = + CONFIG_DRM_I915_STOP_TIMEOUT; + engine->props.timeslice_duration_ms = + CONFIG_DRM_I915_TIMESLICE_DURATION; + /* * To be overridden by the backend on setup. However to facilitate * cleanup on error during setup, we always provide the destroy vfunc. @@ -328,6 +342,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) intel_engine_sanitize_mmio(engine); gt->engine_class[info->class][info->instance] = engine; + gt->engine[id] = engine; intel_engine_add_user(engine); gt->i915->engine[id] = engine; @@ -365,38 +380,40 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) } } -static void intel_setup_engine_capabilities(struct drm_i915_private *i915) +static void intel_setup_engine_capabilities(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) __setup_engine_capabilities(engine); } /** * intel_engines_cleanup() - free the resources allocated for Command Streamers - * @i915: the i915 devic + * @gt: pointer to struct intel_gt */ -void intel_engines_cleanup(struct drm_i915_private *i915) +void intel_engines_cleanup(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { engine->destroy(engine); - i915->engine[id] = NULL; + gt->engine[id] = NULL; + gt->i915->engine[id] = NULL; } } /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @i915: the i915 device + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct drm_i915_private *i915) +int intel_engines_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct intel_device_info *device_info = mkwrite_device_info(i915); const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; @@ -414,7 +431,7 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(&i915->gt, i); + err = intel_engine_setup(gt, i); if (err) goto cleanup; @@ -431,36 +448,36 @@ int intel_engines_init_mmio(struct drm_i915_private *i915) RUNTIME_INFO(i915)->num_engines = hweight32(mask); - intel_gt_check_and_clear_faults(&i915->gt); + intel_gt_check_and_clear_faults(gt); - intel_setup_engine_capabilities(i915); + intel_setup_engine_capabilities(gt); return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(gt); return err; } /** * intel_engines_init() - init the Engine Command Streamers - * @i915: i915 device private + * @gt: pointer to struct intel_gt * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *i915) +int intel_engines_init(struct intel_gt *gt) { int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(i915)) + if (HAS_EXECLISTS(gt->i915)) init = intel_execlists_submission_init; else init = intel_ring_submission_init; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { err = init(engine); if (err) goto cleanup; @@ -469,7 +486,7 @@ int intel_engines_init(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(gt); return err; } @@ -513,7 +530,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, unsigned int flags; flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) + if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so * restrict its pinning to the low mappable arena. @@ -597,7 +614,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); - intel_engine_init_hangcheck(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -616,26 +632,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) /** * intel_engines_setup- setup engine state not requiring hw access - * @i915: Device to setup. + * @gt: pointer to struct intel_gt * * Initializes engine structure members shared between legacy and execlists * submission modes which do not require hardware access. * * Typically done early in the submission mode specific engine setup stage. */ -int intel_engines_setup(struct drm_i915_private *i915) +int intel_engines_setup(struct intel_gt *gt) { int (*setup)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(i915)) + if (HAS_EXECLISTS(gt->i915)) setup = intel_execlists_submission_setup; else setup = intel_ring_submission_setup; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { err = intel_engine_setup_common(engine); if (err) goto cleanup; @@ -653,7 +669,7 @@ int intel_engines_setup(struct drm_i915_private *i915) return 0; cleanup: - intel_engines_cleanup(i915); + intel_engines_cleanup(gt); return err; } @@ -680,6 +696,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) engine->status_page.vma)) goto out_frame; + mutex_lock(&frame->timeline.mutex); + frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; @@ -688,18 +706,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.ring = &frame->ring; - frame->rq.timeline = &frame->timeline; + rcu_assign_pointer(frame->rq.timeline, &frame->timeline); dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; + spin_lock_irq(&engine->active.lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ intel_timeline_unpin(&frame->timeline); out_timeline: + mutex_unlock(&frame->timeline.mutex); intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); @@ -730,6 +752,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { + static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -745,6 +768,14 @@ create_kernel_context(struct intel_engine_cs *engine) return ERR_PTR(err); } + /* + * Give our perma-pinned kernel timelines a separate lockdep class, + * so that we can use them from within the normal user timelines + * should we need to inject GPU operations during their request + * construction. + */ + lockdep_set_class(&ce->timeline->mutex, &kernel); + return ce; } @@ -814,8 +845,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); + if (engine->kernel_context) { + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + } GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); @@ -851,6 +884,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +static unsigned long stop_timeout(const struct intel_engine_cs *engine) +{ + if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ + return 0; + + /* + * If we are doing a normal GPU reset, we can take our time and allow + * the engine to quiesce. We've stopped submission to the engine, and + * if we wait long enough an innocent context should complete and + * leave the engine idle. So they should not be caught unaware by + * the forthcoming GPU reset (which usually follows the stop_cs)! + */ + return READ_ONCE(engine->props.stop_timeout_ms); +} + int intel_engine_stop_cs(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -868,7 +916,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) err = 0; if (__intel_wait_for_register_fw(uncore, mode, MODE_IDLE, MODE_IDLE, - 1000, 0, + 1000, stop_timeout(engine), NULL)) { GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); err = -ETIMEDOUT; @@ -948,6 +996,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -965,7 +1014,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(i915, slice, subslice) { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = read_subslice_reg(engine, slice, subslice, GEN7_SAMPLER_INSTDONE); @@ -1031,6 +1080,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } +void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (__tasklet_is_scheduled(t)) { + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + } + + /* Otherwise flush the tasklet if it was running on another cpu */ + tasklet_unlock_wait(t); +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1049,21 +1117,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Waiting to drain ELSP? */ if (execlists_active(&engine->execlists)) { - struct tasklet_struct *t = &engine->execlists.tasklet; - synchronize_hardirq(engine->i915->drm.pdev->irq); - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); + intel_engine_flush_submission(engine); if (execlists_active(&engine->execlists)) return false; @@ -1093,7 +1149,7 @@ bool intel_engines_are_idle(struct intel_gt *gt) if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1106,7 +1162,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->set_default_submission(engine); } @@ -1118,6 +1174,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) case 3: /* maybe only uses physical not virtual addresses */ return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 4: + return !IS_I965G(engine->i915); /* who knows! */ case 6: return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ default: @@ -1193,6 +1251,38 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } +static struct intel_timeline *get_timeline(struct i915_request *rq) +{ + struct intel_timeline *tl; + + /* + * Even though we are holding the engine->active.lock here, there + * is no control over the submission queue per-se and we are + * inspecting the active state at a random point in time, with an + * unknown queue. Play safe and make sure the timeline remains valid. + * (Only being used for pretty printing, one extra kref shouldn't + * cause a camel stampede!) + */ + rcu_read_lock(); + tl = rcu_dereference(rq->timeline); + if (!kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + + return tl; +} + +static const char *repr_timer(const struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return "inactive"; + + if (timer_pending(t)) + return "active"; + + return "expired"; +} + static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { @@ -1254,19 +1344,21 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", - ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), - ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), - num_entries); + drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.preempt), + repr_timer(&engine->execlists.timer)); read = execlists->csb_head; write = READ_ONCE(*execlists->csb_write); - drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", - read, write, - yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); + drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), + read, write, num_entries); + if (read >= num_entries) read = 0; if (write >= num_entries) @@ -1280,33 +1372,45 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; len = snprintf(hdr, sizeof(hdr), - "\t\tActive[%d: ", + "\t\tActive[%d]: ", (int)(port - execlists->active)); - if (!i915_request_signaled(rq)) + if (!i915_request_signaled(rq)) { + struct intel_timeline *tl = get_timeline(rq); + len += snprintf(hdr + len, sizeof(hdr) - len, "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); + + if (tl) + intel_timeline_put(tl); + } snprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { + struct intel_timeline *tl = get_timeline(rq); char hdr[80]; snprintf(hdr, sizeof(hdr), "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", (int)(port - execlists->pending), i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); print_request(m, rq, hdr); + + if (tl) + intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", @@ -1372,8 +1476,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); - drm_printf(m, "\tHangcheck: %d ms ago\n", - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + drm_printf(m, "\tHeartbeat: %d ms ago\n", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + rcu_read_unlock(); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1383,6 +1492,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { + struct intel_timeline *tl = get_timeline(rq); + print_request(m, rq, "\t\tactive "); drm_printf(m, "\t\tring->start: 0x%08x\n", @@ -1395,18 +1506,27 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - rq->timeline->hwsp_offset); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } print_request_ring(m, rq); + + if (rq->hw_context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + } } spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); - wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); + intel_runtime_pm_put(engine->uncore->rpm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c new file mode 100644 index 000000000000..06aa14c7aa8c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -0,0 +1,234 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" + +#include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_engine.h" +#include "intel_gt.h" +#include "intel_reset.h" + +/* + * While the engine is active, we send a periodic pulse along the engine + * to check on its health and to flush any idle-barriers. If that request + * is stuck, and we fail to preempt it, we declare the engine hung and + * issue a reset -- in the hope that restores progress. + */ + +static bool next_heartbeat(struct intel_engine_cs *engine) +{ + long delay; + + delay = READ_ONCE(engine->props.heartbeat_interval_ms); + if (!delay) + return false; + + delay = msecs_to_jiffies_timeout(delay); + if (delay >= HZ) + delay = round_jiffies_up_relative(delay); + schedule_delayed_work(&engine->heartbeat.work, delay); + + return true; +} + +static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) +{ + engine->wakeref_serial = READ_ONCE(engine->serial) + 1; + i915_request_add_active_barriers(rq); +} + +static void show_heartbeat(const struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct drm_printer p = drm_debug_printer("heartbeat"); + + intel_engine_dump(engine, &p, + "%s heartbeat {prio:%d} not ticking\n", + engine->name, + rq->sched.attr.priority); +} + +static void heartbeat(struct work_struct *wrk) +{ + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), + }; + struct intel_engine_cs *engine = + container_of(wrk, typeof(*engine), heartbeat.work.work); + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + if (!intel_engine_pm_get_if_awake(engine)) + return; + + rq = engine->heartbeat.systole; + if (rq && i915_request_completed(rq)) { + i915_request_put(rq); + engine->heartbeat.systole = NULL; + } + + if (intel_gt_is_wedged(engine->gt)) + goto out; + + if (engine->heartbeat.systole) { + if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + /* + * Gradually raise the priority of the heartbeat to + * give high priority work [which presumably desires + * low latency and no jitter] the chance to naturally + * complete before being preempted. + */ + attr.priority = I915_PRIORITY_MASK; + if (rq->sched.attr.priority >= attr.priority) + attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + if (rq->sched.attr.priority >= attr.priority) + attr.priority = I915_PRIORITY_BARRIER; + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); + } else { + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); + } + goto out; + } + + if (engine->wakeref_serial == engine->serial) + goto out; + + mutex_lock(&ce->timeline->mutex); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + goto unlock; + + idle_pulse(engine, rq); + if (i915_modparams.enable_hangcheck) + engine->heartbeat.systole = i915_request_get(rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +unlock: + mutex_unlock(&ce->timeline->mutex); +out: + if (!next_heartbeat(engine)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + intel_engine_pm_put(engine); +} + +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + return; + + next_heartbeat(engine); +} + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine) +{ + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); +} + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine) +{ + INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); +} + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + int err; + + /* Send one last pulse before to cleanup persistent hogs */ + if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { + err = intel_engine_pulse(engine); + if (err) + return err; + } + + WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + + if (intel_engine_pm_get_if_awake(engine)) { + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } + + return 0; +} + +int intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + int err = 0; + + if (!intel_engine_has_preemption(engine)) + return -ENODEV; + + if (!intel_engine_pm_get_if_awake(engine)) + return 0; + + if (mutex_lock_interruptible(&ce->timeline->mutex)) + goto out_rpm; + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + rq->flags |= I915_REQUEST_SENTINEL; + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + +out_unlock: + mutex_unlock(&ce->timeline->mutex); +out_rpm: + intel_engine_pm_put(engine); + return err; +} + +int intel_engine_flush_barriers(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + if (llist_empty(&engine->barrier_tasks)) + return 0; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + idle_pulse(engine, rq); + i915_request_add(rq); + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_engine_heartbeat.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h new file mode 100644 index 000000000000..a7b8c0f9e005 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_HEARTBEAT_H +#define INTEL_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +void intel_engine_init_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay); + +void intel_engine_park_heartbeat(struct intel_engine_cs *engine); +void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); + +int intel_engine_pulse(struct intel_engine_cs *engine); +int intel_engine_flush_barriers(struct intel_engine_cs *engine); + +#endif /* INTEL_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7f647243b3b9..874d82677179 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -7,10 +7,13 @@ #include "i915_drv.h" #include "intel_engine.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_ring.h" static int __engine_unpark(struct intel_wakeref *wf) { @@ -33,7 +36,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (engine->unpark) engine->unpark(engine); - intel_engine_init_hangcheck(engine); + intel_engine_unpark_heartbeat(engine); return 0; } @@ -103,14 +106,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(rq->timeline); + intel_timeline_enter(i915_request_timeline(rq)); /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); /* Install ourselves as a preemption barrier */ - rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; + rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); /* Release our exclusive hold on the engine */ @@ -123,6 +126,19 @@ out_unlock: return result; } +static void call_idle_barriers(struct intel_engine_cs *engine) +{ + struct llist_node *node, *next; + + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + struct dma_fence_cb *cb = + container_of((struct list_head *)node, + typeof(*cb), node); + + cb->func(NULL, cb); + } +} + static int __engine_park(struct intel_wakeref *wf) { struct intel_engine_cs *engine = @@ -142,6 +158,9 @@ static int __engine_park(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); + call_idle_barriers(engine); /* cleanup after wedging */ + + intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); @@ -169,9 +188,10 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - struct intel_runtime_pm *rpm = &engine->i915->runtime_pm; + struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); + intel_engine_init_heartbeat(engine); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 379a91780bd4..397186818305 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref) return 0; } +__i915_active_call static void pool_retire(struct i915_active *ref) { struct intel_engine_pool_node *node = @@ -94,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) return ERR_PTR(-ENOMEM); node->pool = pool; - i915_active_init(engine->i915, &node->active, pool_active, pool_retire); + i915_active_init(&node->active, pool_active, pool_retire); obj = i915_gem_object_create_internal(engine->i915, sz); if (IS_ERR(obj)) { @@ -109,9 +110,19 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } +static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +{ + if (intel_engine_is_virtual(engine)) + engine = intel_virtual_engine_get_sibling(engine, 0); + + GEM_BUG_ON(!engine); + return &engine->pool; +} + struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size) +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) { + struct intel_engine_pool *pool = lookup_pool(engine); struct intel_engine_pool_node *node; struct list_head *list; unsigned long flags; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h index 8d069efd9457..1bd89cadc3b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -12,13 +12,13 @@ #include "i915_request.h" struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size); +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); static inline int intel_engine_pool_mark_active(struct intel_engine_pool_node *node, struct i915_request *rq) { - return i915_active_ref(&node->active, rq->timeline, rq); + return i915_active_add_request(&node->active, rq); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 9dd8c299cb2d..758f0e8ec672 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -15,6 +15,7 @@ #include <linux/rbtree.h> #include <linux/timer.h> #include <linux/types.h> +#include <linux/workqueue.h> #include "i915_gem.h" #include "i915_pmu.h" @@ -58,6 +59,7 @@ struct i915_gem_context; struct i915_request; struct i915_sched_attr; struct intel_gt; +struct intel_ring; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -76,40 +78,6 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; -struct intel_engine_hangcheck { - u64 acthd; - u32 last_ring; - u32 last_head; - unsigned long action_timestamp; - struct intel_instdone instdone; -}; - -struct intel_ring { - struct kref ref; - struct i915_vma *vma; - void *vaddr; - - /* - * As we have two types of rings, one global to the engine used - * by ringbuffer submission and those that are exclusive to a - * context used by execlists, we have to play safe and allow - * atomic updates to the pin_count. However, the actual pinning - * of the context is either done during initialisation for - * ringbuffer submission or serialised as part of the context - * pinning for execlists, and so we do not need a mutex ourselves - * to serialise intel_ring_pin/intel_ring_unpin. - */ - atomic_t pin_count; - - u32 head; - u32 tail; - u32 emit; - - u32 space; - u32 size; - u32 effective_size; -}; - /* * we use a single page to load ctx workarounds so all of these * values are referred in terms of dwords @@ -148,6 +116,7 @@ enum intel_engine_id { VECS1, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES +#define INVALID_ENGINE ((enum intel_engine_id)-1) }; struct st_preempt_hang { @@ -174,6 +143,11 @@ struct intel_engine_execlists { struct timer_list timer; /** + * @preempt: reset the current context if it fails to give way + */ + struct timer_list preempt; + + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ struct i915_priolist default_priolist; @@ -303,10 +277,12 @@ struct intel_engine_cs { u8 uabi_class; u8 uabi_instance; + u32 uabi_capabilities; u32 context_size; u32 mmio_base; - u32 uabi_capabilities; + unsigned int context_tag; +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) struct rb_node uabi_node; @@ -323,6 +299,11 @@ struct intel_engine_cs { intel_engine_mask_t saturated; /* submitting semaphores too late? */ + struct { + struct delayed_work work; + struct i915_request *systole; + } heartbeat; + unsigned long serial; unsigned long wakeref_serial; @@ -473,14 +454,13 @@ struct intel_engine_cs { /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; - struct intel_engine_hangcheck hangcheck; - #define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; @@ -539,6 +519,13 @@ struct intel_engine_cs { */ ktime_t total; } stats; + + struct { + unsigned long heartbeat_interval_ms; + unsigned long preempt_timeout_ms; + unsigned long stop_timeout_ms; + unsigned long timeslice_duration_ms; + } props; }; static inline bool @@ -583,20 +570,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) +static inline bool +intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) +{ + return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; +} -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) +#define instdone_has_slice(dev_priv___, sseu___, slice___) \ + ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ + (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ + intel_sseu_has_subslice(sseu__, 0, subslice__)) +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ + (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ + (slice_) += ((subslice_) == 0)) \ + for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ + (instdone_has_subslice(dev_priv_, sseu_, slice_, \ + subslice_))) #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 77cd5de83930..7f7150a733f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -160,10 +160,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring) }; if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) - return -1; + return INVALID_ENGINE; if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max)) - return -1; + return INVALID_ENGINE; return map[ring->class].base + ring->instance; } @@ -171,23 +171,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring) static void add_legacy_ring(struct legacy_ring *ring, struct intel_engine_cs *engine) { - int idx; - if (engine->gt != ring->gt || engine->class != ring->class) { ring->gt = engine->gt; ring->class = engine->class; ring->instance = 0; } - idx = legacy_ring_idx(ring); - if (unlikely(idx == -1)) - return; - - GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine)); - ring->gt->engine[idx] = engine; - ring->instance++; - - engine->legacy_idx = idx; + engine->legacy_idx = legacy_ring_idx(ring); + if (engine->legacy_idx != INVALID_ENGINE) + ring->instance++; } void intel_engines_driver_register(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 86e00a2db8a4..4294f146f13c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -112,6 +112,7 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -119,6 +120,8 @@ #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) +#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) +#define MI_SEMAPHORE_TOKEN_SHIFT 5 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ @@ -132,7 +135,10 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ +#define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) @@ -147,6 +153,7 @@ #define MI_FLUSH_DW_USE_PPGTT (0<<2) #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -156,7 +163,8 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) +#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) +#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ /* * 3D instructions used by the kernel @@ -217,6 +225,7 @@ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) @@ -224,7 +233,9 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) @@ -235,6 +246,29 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) +/* Opcodes for MI_MATH_INSTR */ +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) +/* Registers used as operands in MI_MATH_INSTR */ +#define MI_MATH_REG(x) (x) +#define MI_MATH_REG_SRCA 0x20 +#define MI_MATH_REG_SRCB 0x21 +#define MI_MATH_REG_ACCU 0x31 +#define MI_MATH_REG_ZF 0x32 +#define MI_MATH_REG_CF 0x33 + /* * Commands used only by the command parser */ @@ -251,7 +285,6 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d48ec9a76ed1..4c26daf7ee46 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -6,7 +6,12 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_mocs.h" +#include "intel_rc6.h" +#include "intel_rps.h" #include "intel_uncore.h" +#include "intel_pm.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -18,15 +23,108 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); - intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); + intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); + + intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); } -void intel_gt_init_hw(struct drm_i915_private *i915) +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) +{ + gt->ggtt = ggtt; + + intel_gt_sanitize(gt, false); +} + +static void init_unused_ring(struct intel_gt *gt, u32 base) { - i915->gt.ggtt = &i915->ggtt; + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_write(uncore, RING_CTL(base), 0); + intel_uncore_write(uncore, RING_HEAD(base), 0); + intel_uncore_write(uncore, RING_TAIL(base), 0); + intel_uncore_write(uncore, RING_START(base), 0); +} + +static void init_unused_rings(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + if (IS_I830(i915)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + init_unused_ring(gt, SRB2_BASE); + init_unused_ring(gt, SRB3_BASE); + } else if (IS_GEN(i915, 2)) { + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + } else if (IS_GEN(i915, 3)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, PRB2_BASE); + } +} + +int intel_gt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + int ret; + + BUG_ON(!i915->kernel_context); + ret = intel_gt_terminally_wedged(gt); + if (ret) + return ret; + + gt->last_init_time = ktime_get(); + + /* Double layer security blanket, see i915_gem_init() */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) + intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); + + if (IS_HASWELL(i915)) + intel_uncore_write(uncore, + MI_PREDICATE_RESULT_2, + IS_HSW_GT3(i915) ? + LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + + /* Apply the GT workarounds... */ + intel_gt_apply_workarounds(gt); + /* ...and determine whether they are sticking. */ + intel_gt_verify_workarounds(gt, "init"); + + intel_gt_init_swizzling(gt); + + /* + * At least 830 can leave some of the unused rings + * "active" (ie. head != tail) after resume which + * will prevent c3 entry. Makes sure all unused rings + * are totally idle. + */ + init_unused_rings(gt); + + ret = i915_ppgtt_init_hw(gt); + if (ret) { + DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); + goto out; + } + + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(>->uc); + if (ret) { + i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); + goto out; + } + + intel_mocs_init(gt); + +out: + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + return ret; } static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) @@ -89,7 +187,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine_masked(engine, i915, engine_mask, id) + for_each_engine_masked(engine, gt, engine_mask, id) gen8_clear_engine_error_register(engine); } } @@ -100,7 +198,7 @@ static void gen6_check_faults(struct intel_gt *gt) enum intel_engine_id id; u32 fault; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { fault = GEN6_RING_FAULT_REG_READ(engine); if (fault & RING_FAULT_VALID) { DRM_DEBUG_DRIVER("Unexpected fault\n" @@ -176,7 +274,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) void intel_gt_flush_ggtt_writes(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; intel_wakeref_t wakeref; /* @@ -200,18 +298,18 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) wmb(); - if (INTEL_INFO(i915)->has_coherent_ggtt) + if (INTEL_INFO(gt->i915)->has_coherent_ggtt) return; intel_gt_chipset_flush(gt); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_uncore *uncore = gt->uncore; + with_intel_runtime_pm(uncore->rpm, wakeref) { + unsigned long flags; - spin_lock_irq(&uncore->lock); + spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); + spin_unlock_irqrestore(&uncore->lock, flags); } } @@ -222,7 +320,12 @@ void intel_gt_chipset_flush(struct intel_gt *gt) intel_gtt_chipset_flush(); } -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) +void intel_gt_driver_register(struct intel_gt *gt) +{ + intel_rps_driver_register(>->rps); +} + +static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -230,7 +333,7 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) int ret; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); @@ -256,11 +359,40 @@ err_unref: return ret; } -void intel_gt_fini_scratch(struct intel_gt *gt) +static void intel_gt_fini_scratch(struct intel_gt *gt) { i915_vma_unpin_and_release(>->scratch, 0); } +int intel_gt_init(struct intel_gt *gt) +{ + int err; + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + return err; + + intel_gt_pm_init(gt); + + return 0; +} + +void intel_gt_driver_remove(struct intel_gt *gt) +{ + GEM_BUG_ON(gt->awake); +} + +void intel_gt_driver_unregister(struct intel_gt *gt) +{ + intel_rps_driver_unregister(>->rps); +} + +void intel_gt_driver_release(struct intel_gt *gt) +{ + intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); +} + void intel_gt_driver_late_release(struct intel_gt *gt) { intel_uc_driver_late_release(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4920cb351f10..5436f8c30708 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -28,7 +28,14 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw(struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +int __must_check intel_gt_init_hw(struct intel_gt *gt); +int intel_gt_init(struct intel_gt *gt); +void intel_gt_driver_register(struct intel_gt *gt); + +void intel_gt_driver_unregister(struct intel_gt *gt); +void intel_gt_driver_remove(struct intel_gt *gt); +void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); @@ -39,11 +46,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_flush_ggtt_writes(struct intel_gt *gt); void intel_gt_chipset_flush(struct intel_gt *gt); -void intel_gt_init_hangcheck(struct intel_gt *gt); - -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size); -void intel_gt_fini_scratch(struct intel_gt *gt); - static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { @@ -55,6 +57,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt) return __intel_reset_failed(>->reset); } -void intel_gt_queue_hangcheck(struct intel_gt *gt); - #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 34a4fb624bf7..973ee7eded64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" +#include "intel_rps.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(gt, iir); + return gen11_rps_irq_handler(>->rps, iir); WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); @@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(gt->i915, gt_iir[2]); + gen6_rps_irq_handler(>->rps, gt_iir[2]); guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index fac75afed35b..6187cdd06646 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -4,17 +4,38 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/suspend.h> + #include "i915_drv.h" +#include "i915_globals.h" #include "i915_params.h" +#include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_llc.h" #include "intel_pm.h" +#include "intel_rc6.h" +#include "intel_rps.h" #include "intel_wakeref.h" -static void pm_notify(struct drm_i915_private *i915, int state) +static void user_forcewake(struct intel_gt *gt, bool suspend) { - blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); } static int __gt_unpark(struct intel_wakeref *wf) @@ -24,6 +45,8 @@ static int __gt_unpark(struct intel_wakeref *wf) GEM_TRACE("\n"); + i915_globals_unpark(); + /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -41,46 +64,41 @@ static int __gt_unpark(struct intel_wakeref *wf) if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - intel_enable_gt_powersave(i915); - - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); - intel_gt_queue_hangcheck(gt); - - pm_notify(i915, INTEL_GT_UNPARK); + intel_gt_unpark_requests(gt); return 0; } static int __gt_park(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); - intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + intel_wakeref_t wakeref = fetch_and_zero(>->awake); + struct drm_i915_private *i915 = gt->i915; GEM_TRACE("\n"); - pm_notify(i915, INTEL_GT_PARK); + intel_gt_park_requests(gt); + i915_vma_parked(gt); i915_pmu_gt_parked(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); + intel_rps_park(>->rps); + + /* Everything switched off, flush any residual interrupt just in case */ + intel_synchronize_irq(i915); if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { - i915_rc6_ctx_wa_check(i915); + intel_rc6_ctx_wa_check(&i915->gt.rc6); intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); } - /* Everything switched off, flush any residual interrupt just in case */ - intel_synchronize_irq(i915); - GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + i915_globals_park(); + return 0; } @@ -92,9 +110,18 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); + intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); +} - BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); +void intel_gt_pm_init(struct intel_gt *gt) +{ + /* + * Enabling power-management should be "self-healing". If we cannot + * enable a feature, simply leave it disabled with a notice to the + * user. + */ + intel_rc6_init(>->rc6); + intel_rps_init(>->rps); } static bool reset_engines(struct intel_gt *gt) @@ -119,16 +146,47 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; - GEM_TRACE("\n"); + GEM_TRACE("force:%s\n", yesno(force)); + + /* Use a raw wakeref to avoid calling intel_display_power_get early */ + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (intel_gt_is_wedged(gt)) + intel_gt_unset_wedged(gt); intel_uc_sanitize(>->uc); - if (!reset_engines(gt) && !force) - return; + for_each_engine(engine, gt, id) + if (engine->reset.prepare) + engine->reset.prepare(engine); - for_each_engine(engine, gt->i915, id) - __intel_engine_reset(engine, false); + intel_uc_reset_prepare(>->uc); + + if (reset_engines(gt) || force) { + for_each_engine(engine, gt, id) + __intel_engine_reset(engine, false); + } + + for_each_engine(engine, gt, id) + if (engine->reset.finish) + engine->reset.finish(engine); + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); +} + +void intel_gt_pm_fini(struct intel_gt *gt) +{ + intel_rc6_fini(>->rc6); } int intel_gt_resume(struct intel_gt *gt) @@ -137,6 +195,8 @@ int intel_gt_resume(struct intel_gt *gt) enum intel_engine_id id; int err = 0; + GEM_TRACE("\n"); + /* * After resume, we may need to poke into the pinned kernel * contexts to paper over any damage caused by the sudden suspend. @@ -144,14 +204,23 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); - for_each_engine(engine, gt->i915, id) { + + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_rc6_sanitize(>->rc6); + + intel_rps_enable(>->rps); + intel_llc_enable(>->llc); + + for_each_engine(engine, gt, id) { struct intel_context *ce; intel_engine_pm_get(engine); ce = engine->kernel_context; - if (ce) + if (ce) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); ce->ops->reset(ce); + } engine->serial++; /* kernel context lost */ err = engine->resume(engine); @@ -164,19 +233,99 @@ int intel_gt_resume(struct intel_gt *gt) break; } } + + intel_rc6_enable(>->rc6); + + intel_uc_resume(>->uc); + + user_forcewake(gt, false); + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } +static void wait_for_suspend(struct intel_gt *gt) +{ + if (!intel_gt_pm_is_awake(gt)) + return; + + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } + + intel_gt_pm_wait_for_idle(gt); +} + +void intel_gt_suspend_prepare(struct intel_gt *gt) +{ + user_forcewake(gt, true); + wait_for_suspend(gt); + + intel_uc_suspend(>->uc); +} + +static suspend_state_t pm_suspend_target(void) +{ +#if IS_ENABLED(CONFIG_PM_SLEEP) + return pm_suspend_target_state; +#else + return PM_SUSPEND_TO_IDLE; +#endif +} + +void intel_gt_suspend_late(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + + /* We expect to be idle already; but also want to be independent */ + wait_for_suspend(gt); + + /* + * On disabling the device, we want to turn off HW access to memory + * that we no longer own. + * + * However, not all suspend-states disable the device. S0 (s2idle) + * is effectively runtime-suspend, the device is left powered on + * but needs to be put into a low power state. We need to keep + * powermanagement enabled, but we also retain system state and so + * it remains safe to keep on using our allocated memory. + */ + if (pm_suspend_target() == PM_SUSPEND_TO_IDLE) + return; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + intel_rps_disable(>->rps); + intel_rc6_disable(>->rc6); + intel_llc_disable(>->llc); + } + + intel_gt_sanitize(gt, false); + + GEM_TRACE("\n"); +} + void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); + + GEM_TRACE("\n"); } int intel_gt_runtime_resume(struct intel_gt *gt) { + GEM_TRACE("\n"); + intel_gt_init_swizzling(gt); return intel_uc_runtime_resume(>->uc); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_gt_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index fb39d99cd6ee..b3e17399be9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -12,11 +12,6 @@ #include "intel_gt_types.h" #include "intel_wakeref.h" -enum { - INTEL_GT_UNPARK, - INTEL_GT_PARK, -}; - static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) { return intel_wakeref_is_active(>->wakeref); @@ -43,10 +38,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) } void intel_gt_pm_init_early(struct intel_gt *gt); +void intel_gt_pm_init(struct intel_gt *gt); +void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); + +void intel_gt_suspend_prepare(struct intel_gt *gt); +void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); + void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +static inline bool is_mock_gt(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -ENODEV); +} + #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c new file mode 100644 index 000000000000..353809ac2754 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -0,0 +1,137 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" /* for_each_engine() */ +#include "i915_request.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_timeline.h" + +static void retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; +} + +static void flush_submission(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_flush_submission(engine); +} + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + unsigned long active_count = 0; + unsigned long flags; + bool interruptible; + LIST_HEAD(free); + + interruptible = true; + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + flush_submission(gt); /* kick the ksoftirqd tasklets */ + + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) { + active_count++; /* report busy to caller, try again? */ + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock_irqrestore(&timelines->lock, flags); + + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + interruptible, + timeout); + dma_fence_put(fence); + } + } + + retire_requests(tl); + + spin_lock_irqsave(&timelines->lock, flags); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (!--tl->active_count) + list_del(&tl->link); + else + active_count += !!rcu_access_pointer(tl->last_request.fence); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } + } + spin_unlock_irqrestore(&timelines->lock, flags); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); + + return active_count ? timeout : 0; +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout; +} + +static void retire_work_handler(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), requests.retire_work.work); + + intel_gt_retire_requests(gt); + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} + +void intel_gt_init_requests(struct intel_gt *gt) +{ + INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler); +} + +void intel_gt_park_requests(struct intel_gt *gt) +{ + cancel_delayed_work(>->requests.retire_work); +} + +void intel_gt_unpark_requests(struct intel_gt *gt) +{ + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h new file mode 100644 index 000000000000..bd31cbce47e0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_REQUESTS_H +#define INTEL_GT_REQUESTS_H + +struct intel_gt; + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +static inline void intel_gt_retire_requests(struct intel_gt *gt) +{ + intel_gt_retire_requests_timeout(gt, 0); +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + +void intel_gt_init_requests(struct intel_gt *gt); +void intel_gt_park_requests(struct intel_gt *gt); +void intel_gt_unpark_requests(struct intel_gt *gt); + +#endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index dc295c196d11..d4e14dbd172e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,7 +17,10 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_llc_types.h" #include "intel_reset_types.h" +#include "intel_rc6_types.h" +#include "intel_rps_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -25,14 +28,6 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; -struct intel_hangcheck { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work work; -}; - struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -49,12 +44,23 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; + struct intel_gt_requests { + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; + } requests; + struct intel_wakeref wakeref; + atomic_t user_wakeref; struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ - struct intel_hangcheck hangcheck; struct intel_reset reset; /** @@ -66,7 +72,9 @@ struct intel_gt { */ intel_wakeref_t awake; - struct blocking_notifier_head pm_notifications; + struct intel_llc llc; + struct intel_rc6 rc6; + struct intel_rps rps; ktime_t last_init_time; @@ -89,14 +97,16 @@ enum intel_gt_scratch_field { INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, /* 8 bytes */ - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128, - - /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + /* 6 * 8 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, + + /* 4 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c deleted file mode 100644 index 05d042cdefe2..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "intel_engine.h" -#include "intel_gt.h" -#include "intel_reset.h" - -struct hangcheck { - u64 acthd; - u32 ring; - u32 head; - enum intel_engine_hangcheck_action action; - unsigned long action_timestamp; - int deadlock; - struct intel_instdone instdone; - bool wedged:1; - bool stalled:1; -}; - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return ENGINE_ACTIVE_HEAD; - } - - if (!subunits_stuck(engine)) - return ENGINE_ACTIVE_SUBUNITS; - - return ENGINE_DEAD; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != ENGINE_DEAD) - return ha; - - if (IS_GEN(engine->i915, 2)) - return ENGINE_DEAD; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = ENGINE_READ(engine, RING_CTL); - if (tmp & RING_WAIT) { - intel_gt_handle_error(engine->gt, engine->mask, 0, - "stuck wait on %s", engine->name); - ENGINE_WRITE(engine, RING_CTL, tmp); - return ENGINE_WAIT_KICK; - } - - return ENGINE_DEAD; -} - -static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - hc->acthd = intel_engine_get_active_head(engine); - hc->ring = ENGINE_READ(engine, RING_START); - hc->head = ENGINE_READ(engine, RING_HEAD); -} - -static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_ring = hc->ring; - engine->hangcheck.last_head = hc->head; -} - -static enum intel_engine_hangcheck_action -hangcheck_get_action(struct intel_engine_cs *engine, - const struct hangcheck *hc) -{ - if (intel_engine_is_idle(engine)) - return ENGINE_IDLE; - - if (engine->hangcheck.last_ring != hc->ring) - return ENGINE_ACTIVE_SEQNO; - - if (engine->hangcheck.last_head != hc->head) - return ENGINE_ACTIVE_SEQNO; - - return engine_stuck(engine, hc->acthd); -} - -static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct hangcheck *hc) -{ - unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; - - hc->action = hangcheck_get_action(engine, hc); - - /* We always increment the progress - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, make it as a progress as the seqno - * advancement might ensure and if not, it - * will catch the hanging engine. - */ - - switch (hc->action) { - case ENGINE_IDLE: - case ENGINE_ACTIVE_SEQNO: - /* Clear head and subunit states on seqno movement */ - hc->acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - /* Intentional fall through */ - case ENGINE_WAIT_KICK: - case ENGINE_WAIT: - engine->hangcheck.action_timestamp = jiffies; - break; - - case ENGINE_ACTIVE_HEAD: - case ENGINE_ACTIVE_SUBUNITS: - /* - * Seqno stuck with still active engine gets leeway, - * in hopes that it is just a long shader. - */ - timeout = I915_SEQNO_DEAD_TIMEOUT; - break; - - case ENGINE_DEAD: - break; - - default: - MISSING_CASE(hc->action); - } - - hc->stalled = time_after(jiffies, - engine->hangcheck.action_timestamp + timeout); - hc->wedged = time_after(jiffies, - engine->hangcheck.action_timestamp + - I915_ENGINE_WEDGED_TIMEOUT); -} - -static void hangcheck_declare_hang(struct intel_gt *gt, - intel_engine_mask_t hung, - intel_engine_mask_t stuck) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - char msg[80]; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt->i915, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg); -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void hangcheck_elapsed(struct work_struct *work) -{ - struct intel_gt *gt = - container_of(work, typeof(*gt), hangcheck.work.work); - intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - if (!i915_modparams.enable_hangcheck) - return; - - if (!READ_ONCE(gt->awake)) - return; - - if (intel_gt_is_wedged(gt)) - return; - - wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); - if (!wakeref) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - - for_each_engine(engine, gt->i915, id) { - struct hangcheck hc; - - intel_engine_signal_breadcrumbs(engine); - - hangcheck_load_sample(engine, &hc); - hangcheck_accumulate_sample(engine, &hc); - hangcheck_store_sample(engine, &hc); - - if (hc.stalled) { - hung |= engine->mask; - if (hc.action != ENGINE_DEAD) - stuck |= engine->mask; - } - - if (hc.wedged) - wedged |= engine->mask; - } - - if (GEM_SHOW_DEBUG() && (hung | stuck)) { - struct drm_printer p = drm_debug_printer("hangcheck"); - - for_each_engine(engine, gt->i915, id) { - if (intel_engine_is_idle(engine)) - continue; - - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - } - - if (wedged) { - dev_err(gt->i915->drm.dev, - "GPU recovery timed out," - " cancelling all in-flight rendering.\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - } - - if (hung) - hangcheck_declare_hang(gt, hung, stuck); - - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); - - /* Reset timer in case GPU hangs without another request being added */ - intel_gt_queue_hangcheck(gt); -} - -void intel_gt_queue_hangcheck(struct intel_gt *gt) -{ - unsigned long delay; - - if (unlikely(!i915_modparams.enable_hangcheck)) - return; - - /* - * Don't continually defer the hangcheck so that it is always run at - * least once after work has been scheduled on any ring. Otherwise, - * we will ignore a hung ring if a second ring is kept busy. - */ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, >->hangcheck.work, delay); -} - -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - engine->hangcheck.action_timestamp = jiffies; -} - -void intel_gt_init_hangcheck(struct intel_gt *gt) -{ - INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftest_hangcheck.c" -#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c new file mode 100644 index 000000000000..ceb785b75c25 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/cpufreq.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_sideband.h" + +struct ia_constants { + unsigned int min_gpu_freq; + unsigned int max_gpu_freq; + + unsigned int min_ring_freq; + unsigned int max_ia_freq; +}; + +static struct intel_gt *llc_to_gt(struct intel_llc *llc) +{ + return container_of(llc, struct intel_gt, llc); +} + +static unsigned int cpu_max_MHz(void) +{ + struct cpufreq_policy *policy; + unsigned int max_khz; + + policy = cpufreq_cpu_get(0); + if (policy) { + max_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_khz = tsc_khz; + } + + return max_khz / 1000; +} + +static bool get_ia_constants(struct intel_llc *llc, + struct ia_constants *consts) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct intel_rps *rps = &llc_to_gt(llc)->rps; + + if (rps->max_freq <= rps->min_freq) + return false; + + consts->max_ia_freq = cpu_max_MHz(); + + consts->min_ring_freq = + intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3); + + consts->min_gpu_freq = rps->min_freq; + consts->max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + consts->min_gpu_freq /= GEN9_FREQ_SCALER; + consts->max_gpu_freq /= GEN9_FREQ_SCALER; + } + + return true; +} + +static void calc_ia_freq(struct intel_llc *llc, + unsigned int gpu_freq, + const struct ia_constants *consts, + unsigned int *out_ia_freq, + unsigned int *out_ring_freq) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + const int diff = consts->max_gpu_freq - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; + + if (INTEL_GEN(i915) >= 9) { + /* + * ring_freq = 2 * GT. ring_freq is in 100MHz units + * No floor required for ring frequency on SKL. + */ + ring_freq = gpu_freq; + } else if (INTEL_GEN(i915) >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(consts->min_ring_freq, gpu_freq); + } else if (IS_HASWELL(i915)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(consts->min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + const int min_freq = 15; + const int scale = 180; + + /* + * On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = consts->max_ia_freq - diff * scale / 2; + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } + + *out_ia_freq = ia_freq; + *out_ring_freq = ring_freq; +} + +static void gen6_update_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + unsigned int gpu_freq; + + if (!get_ia_constants(llc, &consts)) + return; + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = consts.max_gpu_freq; + gpu_freq >= consts.min_gpu_freq; + gpu_freq--) { + unsigned int ia_freq, ring_freq; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + sandybridge_pcode_write(i915, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); + } +} + +void intel_llc_enable(struct intel_llc *llc) +{ + if (HAS_LLC(llc_to_gt(llc)->i915)) + gen6_update_ring_freq(llc); +} + +void intel_llc_disable(struct intel_llc *llc) +{ + /* Currently there is no HW configuration to be done to disable. */ +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_llc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h new file mode 100644 index 000000000000..ef09a890d2b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_H +#define INTEL_LLC_H + +struct intel_llc; + +void intel_llc_enable(struct intel_llc *llc); +void intel_llc_disable(struct intel_llc *llc); + +#endif /* INTEL_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h new file mode 100644 index 000000000000..ecad4687b930 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_TYPES_H +#define INTEL_LLC_TYPES_H + +struct intel_llc { +}; + +#endif /* INTEL_LLC_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 06a506c29463..0ac3b26674ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -145,6 +145,7 @@ #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -230,17 +231,42 @@ static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine); static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close); +static void +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); static void mark_eio(struct i915_request *rq) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); } +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + struct i915_request *active = rq; + + rcu_read_lock(); + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + rcu_read_unlock(); + + return active; +} + static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -337,10 +363,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * However, the priority hint is a mere hint that we may need to * preempt. If that hint is stale or we may be trying to preempt * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. */ - last_prio = effective_prio(rq); - if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) return false; /* @@ -429,12 +460,8 @@ assert_priority_queue(const struct i915_request *prev, static u64 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) desc = INTEL_LEGACY_64B_CONTEXT; @@ -444,33 +471,379 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ + desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } return desc; } +static u32 *set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count)) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END() 0 +{ + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + regs += *data++ & ~BIT(7); + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + *regs = base + (offset << 2); + regs += 2; + } while (--count); + } + + return regs; +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END(), +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END(), +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } +} + static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -489,7 +862,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; if (i915_request_completed(rq)) continue; /* XXX */ @@ -504,8 +876,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -516,6 +887,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + struct intel_engine_cs *owner = rq->hw_context->engine; + /* * Decouple the virtual breadcrumb before moving it * back to the virtual engine -- we don't want the @@ -525,7 +898,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } @@ -561,6 +935,114 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +static void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -569,6 +1051,21 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + ce->lrc_desc |= (u64)ce->tag << 32; + } else { + /* We don't need a strict matching tag, just different values */ + ce->lrc_desc &= ~GENMASK_ULL(47, 37); + ce->lrc_desc |= + (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + GEN11_SW_CTX_ID_SHIFT; + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + } + intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -612,6 +1109,12 @@ __execlists_schedule_out(struct i915_request *rq, { struct intel_context * const ce = rq->hw_context; + /* + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. + */ + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); @@ -654,7 +1157,7 @@ static u64 execlists_update_context(const struct i915_request *rq) struct intel_context *ce = rq->hw_context; u64 desc; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = + ce->lrc_reg_state[CTX_RING_TAIL] = intel_ring_set_tail(rq->ring, rq->tail); /* @@ -677,6 +1180,10 @@ static u64 execlists_update_context(const struct i915_request *rq) desc = ce->lrc_desc; ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + /* Wa_1607138340:tgl */ + if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) + desc |= CTX_DESC_FORCE_RESTORE; + return desc; } @@ -699,6 +1206,9 @@ trace_ports(const struct intel_engine_execlists *execlists, const struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + if (!ports[0]) + return; + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", engine->name, msg, ports[0]->fence.context, @@ -719,25 +1229,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); - if (!execlists->pending[0]) + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; + } - if (execlists->pending[execlists_num_ports(execlists)]) + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); return false; + } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) + if (ce == rq->hw_context) { + GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + port - execlists->pending); return false; + } ce = rq->hw_context; if (i915_request_completed(rq)) continue; - if (i915_active_is_idle(&ce->active)) + if (i915_active_is_idle(&ce->active)) { + GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + port - execlists->pending); + return false; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + port - execlists->pending); return false; + } - if (!i915_vma_is_pinned(ce->state)) + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + port - execlists->pending); return false; + } } return ce; @@ -814,6 +1344,10 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; + if (unlikely((prev->flags ^ next->flags) & + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + return false; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -823,47 +1357,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - u32 base = engine->mmio_base; - - /* Must match execlists_init_reg_state()! */ - - regs[CTX_CONTEXT_CONTROL] = - i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); - regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); - regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); - regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); - regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); - - regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); - regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); - regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); - regs[CTX_SECOND_BB_HEAD_U] = - i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); - regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); - regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); - - regs[CTX_CTX_TIMESTAMP] = - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); - regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); - regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); - regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); - regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); - regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); - regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); - regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - - if (engine->class == RENDER_CLASS) { - regs[CTX_RCS_INDIRECT_CTX] = - i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); - regs[CTX_RCS_INDIRECT_CTX_OFFSET] = - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); - regs[CTX_BB_PER_CTX_PTR] = - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); - - regs[CTX_R_PWR_CLK_STATE] = - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - } + set_offsets(regs, reg_offsets(engine), engine); } static bool virtual_matches(const struct virtual_engine *ve, @@ -978,7 +1472,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) { int hint; - if (!intel_engine_has_semaphores(engine)) + if (!intel_engine_has_timeslices(engine)) return false; if (list_is_last(&rq->sched.link, &engine->active.requests)) @@ -999,15 +1493,32 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) return rq_prio(list_next_entry(rq, sched.link)); } -static bool -enable_timeslice(const struct intel_engine_execlists *execlists) +static inline unsigned long +timeslice(const struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->props.timeslice_duration_ms); +} + +static unsigned long +active_timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *execlists->active; + const struct i915_request *rq = *engine->execlists.active; if (i915_request_completed(rq)) - return false; + return 0; + + if (engine->execlists.switch_priority_hint < effective_prio(rq)) + return 0; + + return timeslice(engine); +} + +static void set_timeslice(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_timeslices(engine)) + return; - return execlists->switch_priority_hint >= effective_prio(rq); + set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1015,6 +1526,30 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = last_active(&engine->execlists); + if (!rq) + return 0; + + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ + if (unlikely(i915_gem_context_is_banned(rq->gem_context))) + return 1; + + return READ_ONCE(engine->props.preempt_timeout_ms); +} + +static void set_preempt_timeout(struct intel_engine_cs *engine) +{ + if (!intel_engine_has_preempt_reset(engine)) + return; + + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine)); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1111,7 +1646,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && - !timer_pending(&engine->execlists.timer)) { + timer_expired(&engine->execlists.timer)) { GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", engine->name, last->fence.context, @@ -1147,8 +1682,18 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * submission. */ if (!list_is_last(&last->sched.link, - &engine->active.requests)) + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + if (!execlists->timer.expires && + need_timeslice(engine, last)) + set_timer_ms(&execlists->timer, + timeslice(engine)); + return; + } /* * WaIdleLiteRestore:bdw,skl @@ -1216,7 +1761,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) unsigned int n; GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - virtual_update_register_offsets(regs, engine); + + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(regs, + engine); if (!list_empty(&ve->context.signals)) virtual_xfer_breadcrumbs(ve, engine); @@ -1299,6 +1847,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->hw_context == rq->hw_context) goto done; + if (i915_request_has_sentinel(last)) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we @@ -1357,11 +1908,28 @@ done: if (submit) { *port = execlists_schedule_in(last, port - execlists->pending); - memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists->switch_priority_hint = switch_prio(engine, *execlists->pending); + + /* + * Skip if we ended up with exactly the same set of requests, + * e.g. trying to timeslice a pair of ordered contexts + */ + if (!memcmp(execlists->active, execlists->pending, + (port - execlists->pending + 1) * sizeof(*port))) { + do + execlists_schedule_out(fetch_and_zero(port)); + while (port-- != execlists->pending); + + goto skip_submit; + } + + memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); + + set_preempt_timeout(engine); } else { +skip_submit: ring_set_paused(engine, 0); } } @@ -1394,13 +1962,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } -enum csb_step { - CSB_NOP, - CSB_PROMOTE, - CSB_PREEMPT, - CSB_COMPLETE, -}; - /* * Starting with Gen12, the status has a new format: * @@ -1427,7 +1988,7 @@ enum csb_step { * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline enum csb_step +static inline bool gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { u32 lower_dw = csb[0]; @@ -1436,9 +1997,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - if (!ctx_away_valid && ctx_to_valid) - return CSB_PROMOTE; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1446,8 +2004,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * instructions and lite-restore. Preempt-to-idle via the CTRL register * would require some extra handling, but we don't support that. */ - if (new_queue && ctx_away_valid) - return CSB_PREEMPT; + if (!ctx_away_valid || new_queue) { + GEM_BUG_ON(!ctx_to_valid); + return true; + } /* * switch detail = 5 is covered by the case above and we do not expect a @@ -1455,30 +2015,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * use polling mode. */ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); - - if (*execlists->active) { - GEM_BUG_ON(!ctx_away_valid); - return CSB_COMPLETE; - } - - return CSB_NOP; + return false; } -static inline enum csb_step +static inline bool gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { - unsigned int status = *csb; - - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) - return CSB_PROMOTE; - - if (status & GEN8_CTX_STATUS_PREEMPTED) - return CSB_PREEMPT; - - if (*execlists->active) - return CSB_COMPLETE; - - return CSB_NOP; + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } static void process_csb(struct intel_engine_cs *engine) @@ -1488,7 +2031,14 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; - GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1517,7 +2067,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - enum csb_step csb_step; + bool promote; if (++head == num_entries) head = 0; @@ -1545,20 +2095,19 @@ static void process_csb(struct intel_engine_cs *engine) buf[2 * head + 0], buf[2 * head + 1]); if (INTEL_GEN(engine->i915) >= 12) - csb_step = gen12_csb_parse(execlists, buf + 2 * head); + promote = gen12_csb_parse(execlists, buf + 2 * head); else - csb_step = gen8_csb_parse(execlists, buf + 2 * head); + promote = gen8_csb_parse(execlists, buf + 2 * head); + if (promote) { + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); - switch (csb_step) { - case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) execlists_schedule_out(*execlists->active++); - /* fallthrough */ - case CSB_PROMOTE: /* switch pending to inflight */ - GEM_BUG_ON(*execlists->active); + /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); execlists->active = memcpy(execlists->inflight, @@ -1566,16 +2115,13 @@ static void process_csb(struct intel_engine_cs *engine) execlists_num_ports(execlists) * sizeof(*execlists->pending)); - if (enable_timeslice(execlists)) - mod_timer(&execlists->timer, jiffies + 1); - - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); + set_timeslice(engine); WRITE_ONCE(execlists->pending[0], NULL); - break; + } else { + GEM_BUG_ON(!*execlists->active); - case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + /* port0 completed, advanced to port1 */ trace_ports(execlists, "completed", execlists->active); /* @@ -1590,10 +2136,6 @@ static void process_csb(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->active - execlists->inflight > execlists_num_ports(execlists)); - break; - - case CSB_NOP: - break; } } while (head != tail); @@ -1623,6 +2165,43 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static noinline void preempt_reset(struct intel_engine_cs *engine) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (i915_modparams.reset < 3) + return; + + if (test_and_set_bit(bit, lock)) + return; + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + GEM_TRACE("%s: preempt timeout %lu+%ums\n", + engine->name, + READ_ONCE(engine->props.preempt_timeout_ms), + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + intel_engine_reset(engine, "preemption time out"); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(const struct intel_engine_cs *const engine) +{ + const struct timer_list *t = &engine->execlists.preempt; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!timer_expired(t)) + return false; + + return READ_ONCE(engine->execlists.pending[0]); +} + /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -1630,23 +2209,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; + bool timeout = preempt_timeout(engine); process_csb(engine); - if (!READ_ONCE(engine->execlists.pending[0])) { + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { + unsigned long flags; + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); + + /* Recheck after serialising with direct-submission */ + if (timeout && preempt_timeout(engine)) + preempt_reset(engine); } } -static void execlists_submission_timer(struct timer_list *timer) +static void __execlists_kick(struct intel_engine_execlists *execlists) { - struct intel_engine_cs *engine = - from_timer(engine, timer, execlists.timer); - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); } static void queue_request(struct intel_engine_cs *engine, @@ -1726,7 +2321,6 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -1738,7 +2332,6 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) @@ -1752,14 +2345,13 @@ static void execlists_context_unpin(struct intel_context *ce) check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, ce->engine); - i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); intel_ring_reset(ce->ring, ce->ring->tail); } static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1767,16 +2359,16 @@ __execlists_update_reg_state(struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; + regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE + 1] = + regs[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); - i915_oa_init_reg_state(engine, ce, regs); + i915_oa_init_reg_state(ce, engine); } } @@ -1802,18 +2394,12 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_map; - ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); unpin_active: intel_context_active_release(ce); err: @@ -1869,7 +2455,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1885,7 +2471,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_timeline(rq)->hwsp_offset; *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -1897,60 +2483,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm); - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - /* Be doubly sure the LRI have landed before proceeding */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Re-invalidate the TLB for luck */ - return engine->emit_flush(rq, EMIT_INVALIDATE); -} - static int execlists_request_alloc(struct i915_request *request) { int ret; @@ -1973,10 +2505,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->hw_context->vm)) - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - else - ret = emit_pdps(request); + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; @@ -2028,12 +2557,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 slm_offset(struct intel_engine_cs *engine) -{ - return intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); -} - /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -2062,10 +2585,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* Actual scratch location is at 128 bytes offset */ batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - slm_offset(engine)); + LRC_PPHWSP_SCRATCH_ADDR); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2423,27 +2946,29 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) &execlists->csb_status[reset_value]); } -static struct i915_request *active_request(struct i915_request *rq) +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - const struct intel_context * const ce = rq->hw_context; - struct i915_request *active = NULL; - struct list_head *list; - - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ - return rq; - - list = &rq->timeline->requests; - list_for_each_entry_from_reverse(rq, list, link) { - if (i915_request_completed(rq)) - break; + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} - if (rq->hw_context != ce) - break; +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + int x; - active = rq; + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; } - - return active; } static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -2451,7 +2976,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; - u32 *regs; + + mb(); /* paranoia: read the CSB pointers from after the reset */ + clflush(execlists->csb_write); + mb(); process_csb(engine); /* drain preemption events */ @@ -2467,16 +2995,23 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + ce = rq->hw_context; - GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - rq = active_request(rq); - if (!rq) { - ce->ring->head = ce->ring->tail; + + if (i915_request_completed(rq)) { + /* Idle context; tidy up the ring so we can restart afresh */ + ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); goto out_replay; } + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(ce->ring->head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -2516,19 +3051,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = ce->lrc_reg_state; - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + restore_default_state(ce, engine); out_replay: - GEM_TRACE("%s replay {head:%04x, tail:%04x\n", + GEM_TRACE("%s replay {head:%04x, tail:%04x}\n", engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); + __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -2749,7 +3281,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) } *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ intel_ring_advance(request, cs); @@ -2760,10 +3292,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2785,7 +3313,7 @@ static int gen8_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; /* * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL @@ -2818,7 +3346,7 @@ static int gen8_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); if (dc_flush_wa) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); @@ -2831,11 +3359,6 @@ static int gen8_emit_flush_render(struct i915_request *request, static int gen11_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - const u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 *cs; u32 flags = 0; @@ -2848,13 +3371,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -2872,14 +3395,106 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + return 0; +} + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | 1 << 8 | state; +} + +static int gen12_emit_flush_render(struct i915_request *request, + u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 8); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Prevent the pre-parser from skipping past the TLB + * invalidate and loading a stale page for the batch + * buffer / request payload. + */ + *cs++ = preparser_disable(true); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + + *cs++ = preparser_disable(false); intel_ring_advance(request, cs); + + /* + * Wa_1604544889:tgl + */ + if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { + flags = 0; + flags |= PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } } return 0; @@ -2933,7 +3548,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, 0); return gen8_emit_fini_breadcrumb_footer(request, cs); @@ -2941,28 +3556,28 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE); - - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, 0); + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + return gen8_emit_fini_breadcrumb_footer(request, cs); } -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, - u32 *cs) +static u32 * +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -2973,9 +3588,88 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, return gen8_emit_fini_breadcrumb_footer(request, cs); } +/* + * Note that the CS instruction pre-parser will not stall on the breadcrumb + * flush and will continue pre-fetching the instructions after it before the + * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at + * BB_START/END instructions, so, even though we might pre-fetch the pre-amble + * of the next request before the memory has been flushed, we're guaranteed that + * we won't access the batch itself too early. + * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, + * so, if the current request is modifying an instruction in the next request on + * the same intel_context, we might pre-fetch and then execute the pre-update + * instruction. To avoid this, the users of self-modifying code should either + * disable the parser around the code emitting the memory writes, via a new flag + * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For + * the in-kernel use-cases we've opted to use a separate context, see + * reloc_gpu() as an example. + * All the above applies only to the instructions themselves. Non-inline data + * used by the instructions is not pre-fetched. + */ + +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + + return cs; +} + +static __always_inline u32* +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_USER_INTERRUPT; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = gen12_emit_preempt_busywait(request, cs); + + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); + + return gen8_emit_wa_tail(request, cs); +} + +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + 0); + + return gen12_emit_fini_breadcrumb_footer(request, cs); +} + +static u32 * +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_HDC_PIPELINE_FLUSH); + + return gen12_emit_fini_breadcrumb_footer(request, cs); +} + static void execlists_park(struct intel_engine_cs *engine) { - del_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -2998,6 +3692,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } + + if (INTEL_GEN(engine->i915) >= 12) + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } static void execlists_destroy(struct intel_engine_cs *engine) @@ -3025,6 +3722,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; + if (INTEL_GEN(engine->i915) >= 12) + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -3070,6 +3769,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { case 12: + engine->emit_flush = gen12_emit_flush_render; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; case 11: engine->emit_flush = gen11_emit_flush_render; engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; @@ -3085,7 +3787,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) { tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); @@ -3142,7 +3845,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) return 0; } -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -3175,86 +3878,50 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); - bool rcs = engine->class == RENDER_CLASS; - u32 base = engine->mmio_base; - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED; - - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), +static void init_common_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + const struct intel_ring *ring) +{ + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + if (INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[CTX_RCS_INDIRECT_CTX + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_BB_STATE] = RING_BB_PPGTT; +} - regs[CTX_BB_PER_CTX_PTR + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } +static void init_wa_bb_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + u32 pos_bb_per_ctx) +{ + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; + + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[pos_bb_per_ctx] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + if (wa_ctx->indirect_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[pos_bb_per_ctx + 2] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); + regs[pos_bb_per_ctx + 4] = + intel_lr_indirect_ctx_offset(engine) << 6; + } +} +static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) +{ if (i915_vm_is_4lvl(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and @@ -3267,15 +3934,47 @@ static void execlists_init_reg_state(u32 *regs, ASSIGN_CTX_PDP(ppgtt, regs, 1); ASSIGN_CTX_PDP(ppgtt, regs, 0); } +} - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} + +static void execlists_init_reg_state(u32 *regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close) +{ + /* + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). + */ + u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + + if (close) { /* Close the batch; used mainly by live_lrc_layout() */ + *bbe = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *bbe |= BIT(0); } - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); + init_common_reg_state(regs, engine, ring); + init_ppgtt_reg_state(regs, vm_alias(ce->vm)); + + init_wa_bb_reg_state(regs, engine, + INTEL_GEN(engine->i915) >= 12 ? + GEN12_CTX_BB_PER_CTX_PTR : + CTX_BB_PER_CTX_PTR); } static int @@ -3284,6 +3983,7 @@ populate_lr_context(struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { + bool inhibit = true; void *vaddr; u32 *regs; int ret; @@ -3298,12 +3998,6 @@ populate_lr_context(struct intel_context *ce, set_redzone(vaddr, engine); if (engine->default_state) { - /* - * We only want to copy over the template context state; - * skipping over the headers reserved for GuC communication, - * leaving those as zero. - */ - const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, @@ -3313,23 +4007,22 @@ populate_lr_context(struct intel_context *ce, goto err_unpin_ctx; } - memcpy(vaddr + start, defaults + start, engine->context_size); + memcpy(vaddr, defaults, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= + execlists_init_reg_state(regs, ce, engine, ring, inhibit); + if (inhibit) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); ret = 0; err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, - LRC_HEADER_PAGES * PAGE_SIZE, - engine->context_size); + __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); i915_gem_object_unpin_map(ctx_obj); return ret; } @@ -3346,11 +4039,6 @@ static int __execlists_context_alloc(struct intel_context *ce, GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - /* - * Before the actual start of the context image, we insert a few pages - * for our own use and for sharing with the GuC. - */ - context_size += LRC_HEADER_PAGES * PAGE_SIZE; if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ @@ -3462,8 +4150,9 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) return; swap(ve->siblings[swp], ve->siblings[0]); - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (!intel_engine_has_relative_mmio(ve->siblings[0])) + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); } static int virtual_context_pin(struct intel_context *ce) @@ -3714,6 +4403,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.i915 = ctx->i915; ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; @@ -3737,6 +4427,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); @@ -3899,6 +4590,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, return 0; } +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3987,6 +4690,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, u32 head, bool scrub) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + /* * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -3995,16 +4700,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - if (scrub) { - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - } - execlists_init_reg_state(regs, ce, engine, ce->ring); - } + if (scrub) + restore_default_state(ce, engine); /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = head; @@ -4013,6 +4710,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, __execlists_update_reg_state(ce, engine); } +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == + intel_execlists_set_default_submission; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2bba82bcc16..04511d8ebdc1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -66,6 +67,12 @@ struct intel_engine_cs; #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) +#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, @@ -79,30 +86,15 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ - -/* - * We allocate a header at the start of the context image for our own - * use, therefore the actual location of the logical state is offset - * from the start of the VMA. The layout is - * - * | [guc] | [hwsp] [logical state] | - * |<- our header ->|<- context image ->| - * - */ -/* The first page is used for sharing data with the GuC */ -#define LRC_GUCSHR_PN (0) -#define LRC_GUCSHR_SZ (1) /* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_SZ (1) -/* Finally we have the logical state for the context */ +/* After the PPHWSP we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) -/* - * Currently we include the PPHWSP in __intel_engine_context_size() so - * the size of the header is synonymous with the start of the PPHWSP. - */ -#define LRC_HEADER_PAGES LRC_PPHWSP_PN +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) void intel_execlists_set_default_submission(struct intel_engine_cs *engine); @@ -131,4 +123,11 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling); + +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index b8f20ad71169..06ab0276e10e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,55 +9,41 @@ #include <linux/types.h> -/* GEN8+ Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) +/* GEN8 to GEN11 Reg State Context */ +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_BUFFER_START (0x08 + 1) +#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_BB_STATE (0x10 + 1) +#define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_PDP3_UDW (0x24 + 1) +#define CTX_PDP3_LDW (0x26 + 1) +#define CTX_PDP2_UDW (0x28 + 1) +#define CTX_PDP2_LDW (0x2a + 1) +#define CTX_PDP1_UDW (0x2c + 1) +#define CTX_PDP1_LDW (0x2e + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) +#define CTX_R_PWR_CLK_STATE (0x42 + 1) + +#define GEN9_CTX_RING_MI_MODE 0x54 + +/* GEN12+ Reg State Context */ +#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \ } while (0) #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = px_dma(ppgtt->pd); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index cea184a7dde9..2b977991b785 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,6 +26,7 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_ring.h" /* structures required */ struct drm_i915_mocs_entry { @@ -279,10 +280,9 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = { GEN11_MOCS_ENTRIES }; -static bool get_mocs_settings(struct intel_gt *gt, +static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - struct drm_i915_private *i915 = gt->i915; bool result = false; if (INTEL_GEN(i915) >= 12) { @@ -323,9 +323,9 @@ static bool get_mocs_settings(struct intel_gt *gt, return result; } -static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) +static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) { - switch (engine_id) { + switch (engine->id) { case RCS0: return GEN9_GFX_MOCS(index); case VCS0: @@ -339,7 +339,7 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) case VCS2: return GEN11_MFX2_MOCS(index); default: - MISSING_CASE(engine_id); + MISSING_CASE(engine->id); return INVALID_MMIO_REG; } } @@ -357,118 +357,25 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table, return table->table[I915_MOCS_PTE].control_value; } -/** - * intel_mocs_init_engine() - emit the mocs control table - * @engine: The engine for whom to emit the registers. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - */ -void intel_mocs_init_engine(struct intel_engine_cs *engine) +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - struct intel_gt *gt = engine->gt; - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; - u32 unused_value; - - /* Platforms with global MOCS do not need per-engine initialization. */ - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - return; - - /* Called under a blanket forcewake */ - assert_forcewakes_active(uncore, FORCEWAKE_ALL); - - if (!get_mocs_settings(gt, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].control_value; - - for (index = 0; index < table.size; index++) { - u32 value = get_entry_control(&table, index); + struct intel_uncore *uncore = engine->uncore; + u32 unused_value = table->table[I915_MOCS_PTE].control_value; + unsigned int i; + for (i = 0; i < table->size; i++) intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), - value); - } + mocs_register(engine, i), + get_entry_control(table, i)); - /* All remaining entries are also unused */ - for (; index < table.n_entries; index++) + /* All remaining entries are unused */ + for (; i < table->n_entries; i++) intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), + mocs_register(engine, i), unused_value); } -static void intel_mocs_init_global(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; - - GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - - if (!get_mocs_settings(gt, &table)) - return; - - if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) - return; - - for (index = 0; index < table.size; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to the invalid entry (index 0). These - * entries are officially undefined and no contract for the contents and - * settings is given for these entries. - */ - for (; index < table.n_entries; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[0].control_value); -} - -static int emit_mocs_control_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - enum intel_engine_id engine = rq->engine->id; - unsigned int index; - u32 unused_value; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].control_value; - - cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); - - for (index = 0; index < table->size; index++) { - u32 value = get_entry_control(table, index); - - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = value; - } - - /* All remaining entries are also unused */ - for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = unused_value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - /* * Get l3cc_value from MOCS entry taking into account when it's not used: * I915_MOCS_PTE's value is returned in this case. @@ -486,141 +393,99 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, u16 low, u16 high) { - return low | high << 16; + return low | (u32)high << 16; } -static int emit_mocs_l3cc_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) +static void init_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - u16 unused_value; + struct intel_uncore *uncore = engine->uncore; + u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value; unsigned int i; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].l3cc_value; - - cs = intel_ring_begin(rq, 2 + table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); for (i = 0; i < table->size / 2; i++) { u16 low = get_entry_l3cc(table, 2 * i); u16 high = get_entry_l3cc(table, 2 * i + 1); - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, high); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, high)); } /* Odd table size - 1 left over */ - if (table->size & 0x01) { + if (table->size & 1) { u16 low = get_entry_l3cc(table, 2 * i); - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, unused_value); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, unused_value)); i++; } /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) { - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, unused_value, unused_value); - } + for (; i < table->n_entries / 2; i++) + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, unused_value, + unused_value)); +} + +void intel_mocs_init_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_mocs_table table; + + /* Called under a blanket forcewake */ + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); + if (!get_mocs_settings(engine->i915, &table)) + return; + + /* Platforms with global MOCS do not need per-engine initialization. */ + if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915)) + init_mocs_table(engine, &table); - return 0; + if (engine->class == RENDER_CLASS) + init_l3cc_table(engine, &table); } -static void intel_mocs_init_l3cc_table(struct intel_gt *gt) +static void intel_mocs_init_global(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; - unsigned int i; - u16 unused_value; + unsigned int index; - if (!get_mocs_settings(gt, &table)) + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + if (IS_DGFX(gt->i915)) return; - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].l3cc_value; - - for (i = 0; i < table.size / 2; i++) { - u16 low = get_entry_l3cc(&table, 2 * i); - u16 high = get_entry_l3cc(&table, 2 * i + 1); + GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); - } + if (!get_mocs_settings(gt->i915, &table)) + return; - /* Odd table size - 1 left over */ - if (table.size & 0x01) { - u16 low = get_entry_l3cc(&table, 2 * i); + if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) + return; + for (index = 0; index < table.size; index++) intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); - i++; - } + GEN12_GLOBAL_MOCS(index), + table.table[index].control_value); - /* All remaining entries are also unused */ - for (; i < table.n_entries / 2; i++) + /* + * Ok, now set the unused entries to the invalid entry (index 0). These + * entries are officially undefined and no contract for the contents and + * settings is given for these entries. + */ + for (; index < table.n_entries; index++) intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, - unused_value)); -} - -/** - * intel_mocs_emit() - program the MOCS register. - * @rq: Request to use to set up the MOCS tables. - * - * This function will emit a batch buffer with the values required for - * programming the MOCS register values for all the currently supported - * rings. - * - * These registers are partially stored in the RCS context, so they are - * emitted at the same time so that when a context is created these registers - * are set up. These registers have to be emitted into the start of the - * context as setting the ELSP will re-init some of these registers back - * to the hw values. - * - * Return: 0 on success, otherwise the error status. - */ -int intel_mocs_emit(struct i915_request *rq) -{ - struct drm_i915_mocs_table t; - int ret; - - if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) || - rq->engine->class != RENDER_CLASS) - return 0; - - if (get_mocs_settings(rq->engine->gt, &t)) { - /* Program the RCS control registers */ - ret = emit_mocs_control_table(rq, &t); - if (ret) - return ret; - - /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(rq, &t); - if (ret) - return ret; - } - - return 0; + GEN12_GLOBAL_MOCS(index), + table.table[0].control_value); } void intel_mocs_init(struct intel_gt *gt) { - intel_mocs_init_l3cc_table(gt); - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) intel_mocs_init_global(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 2ae816b7ca19..83371f3e6ba1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,13 +49,10 @@ * context handling keep the MOCS in step. */ -struct i915_request; struct intel_engine_cs; struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); -int intel_mocs_emit(struct i915_request *rq); - #endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c new file mode 100644 index 000000000000..700104b90163 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -0,0 +1,787 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/pm_runtime.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_sideband.h" + +/** + * DOC: RC6 + * + * RC6 is a special power stage which allows the GPU to enter an very + * low-voltage mode when idle, using down to 0V while at this stage. This + * stage is entered automatically when the GPU is idle when RC6 support is + * enabled, and as soon as new workload arises GPU wakes up automatically as + * well. + * + * There are different RC6 modes available in Intel GPU, which differentiate + * among each other with the latency required to enter and leave RC6 and + * voltage consumed by the GPU in different states. + * + * The combination of the following flags define which states GPU is allowed + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and + * RC6pp is deepest RC6. Their support by hardware varies according to the + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one + * which brings the most power savings; deeper states save more power, but + * require higher latency to switch to and wake up. + */ + +static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) +{ + return container_of(rc6, struct intel_gt, rc6); +} + +static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->uncore; +} + +static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->i915; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static void gen11_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1)); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE); +} + +static void gen9_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6_mode; + + /* 2b: Program RC6 thresholds.*/ + if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + } else { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); + + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); +} + +static void gen8_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); +} + +static void gen6_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6vids, rc6_mask; + int ret; + + set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(i915)) + set(uncore, GEN6_RC6_THRESHOLD, 125000); + else + set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6p_THRESHOLD, 150000); + set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + + /* We don't use those on Haswell */ + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(i915)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(i915)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + set(uncore, GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); + + rc6vids = 0; + ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + if (IS_GEN(i915, 6) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN(i915, 6) && + (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } +} + +/* Check that the pcbr address is not empty. */ +static int chv_rc6_init(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); + + pctx_paddr = (paddr & ~4095); + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + } + + return 0; +} + +static int vlv_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_gem_object *pctx; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + resource_size_t pcbr_offset; + + pcbr_offset = (pcbr & ~4095) - i915->dsm.start; + pctx = i915_gem_object_create_stolen_for_preallocated(i915, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + if (IS_ERR(pctx)) + return PTR_ERR(pctx); + + goto out; + } + + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(i915, pctx_size); + if (IS_ERR(pctx)) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return PTR_ERR(pctx); + } + + GEM_BUG_ON(range_overflows_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = i915->dsm.start + pctx->stolen->start; + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + +out: + rc6->pctx = pctx; + return 0; +} + +static void chv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2a: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + + /* TO threshold set to 500 us (0x186 * 1.28 us) */ + set(uncore, GEN6_RC6_THRESHOLD, 0x186); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); +} + +static void vlv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + set(uncore, GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); +} + +static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + u32 rc6_ctx_base, rc_ctl, rc_sw_target; + bool enable_rc6 = true; + + rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); + rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); + rc_sw_target &= RC_SW_TARGET_STATE_MASK; + rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); + + if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + enable_rc6 = false; + } + + /* + * The exact context size is not known for BXT, so assume a page size + * for this check. + */ + rc6_ctx_base = + intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; + if (!(rc6_ctx_base >= i915->dsm_reserved.start && + rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + enable_rc6 = false; + } + + if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + enable_rc6 = false; + } + + return enable_rc6; +} + +static bool rc6_supported(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!HAS_RC6(i915)) + return false; + + if (intel_vgpu_active(i915)) + return false; + + if (is_mock_gt(rc6_to_gt(rc6))) + return false; + + if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { + dev_notice(i915->drm.dev, + "RC6 and powersaving disabled by BIOS\n"); + return false; + } + + return true; +} + +static void rpm_get(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(rc6->wakeref); + pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = true; +} + +static void rpm_put(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(!rc6->wakeref); + pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = false; +} + +static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6) +{ + return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO); +} + +static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + rc6->ctx_corrupted = true; + } +} + +/** + * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @rc6: rc6 state + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6) +{ + if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + rc6->ctx_corrupted = false; + } +} + +/** + * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @rc6: rc6 state + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. +*/ +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (rc6->ctx_corrupted) + return; + + if (!intel_rc6_ctx_corrupted(rc6)) + return; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_rc6_disable(rc6); + rc6->ctx_corrupted = true; + + return; +} + +static void __intel_rc6_disable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (INTEL_GEN(i915) >= 9) + set(uncore, GEN9_PG_ENABLE, 0); + set(uncore, GEN6_RC_CONTROL, 0); + set(uncore, GEN6_RC_STATE, 0); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +void intel_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + int err; + + /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ + rpm_get(rc6); + + if (!rc6_supported(rc6)) + return; + + intel_rc6_ctx_wa_init(rc6); + + if (IS_CHERRYVIEW(i915)) + err = chv_rc6_init(rc6); + else if (IS_VALLEYVIEW(i915)) + err = vlv_rc6_init(rc6); + else + err = 0; + + /* Sanitize rc6, ensure it is disabled before we are ready. */ + __intel_rc6_disable(rc6); + + rc6->supported = err == 0; +} + +void intel_rc6_sanitize(struct intel_rc6 *rc6) +{ + if (rc6->enabled) { /* unbalanced suspend/resume */ + rpm_get(rc6); + rc6->enabled = false; + } + + if (rc6->supported) + __intel_rc6_disable(rc6); +} + +void intel_rc6_enable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->supported) + return; + + GEM_BUG_ON(rc6->enabled); + + if (rc6->ctx_corrupted) + return; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (IS_CHERRYVIEW(i915)) + chv_rc6_enable(rc6); + else if (IS_VALLEYVIEW(i915)) + vlv_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 11) + gen11_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 9) + gen9_rc6_enable(rc6); + else if (IS_BROADWELL(i915)) + gen8_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 6) + gen6_rc6_enable(rc6); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + /* rc6 is ready, runtime-pm is go! */ + rpm_put(rc6); + rc6->enabled = true; +} + +void intel_rc6_disable(struct intel_rc6 *rc6) +{ + if (!rc6->enabled) + return; + + rpm_get(rc6); + rc6->enabled = false; + + __intel_rc6_disable(rc6); +} + +void intel_rc6_fini(struct intel_rc6 *rc6) +{ + struct drm_i915_gem_object *pctx; + + intel_rc6_disable(rc6); + + pctx = fetch_and_zero(&rc6->pctx); + if (pctx) + i915_gem_object_put(pctx); + + if (rc6->wakeref) + rpm_put(rc6); +} + +static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* + * The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + lockdep_assert_held(&uncore->lock); + + /* + * vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + do { + tmp = upper; + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = intel_uncore_read_fw(uncore, reg); + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + } while (upper != tmp && --loop); + + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; + u32 mul, div; + + if (!rc6->supported) + return 0; + + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + mul = 1000000; + div = i915->czclk_freq; + overflow_hw = BIT_ULL(40); + time_hw = vlv_residency_raw(uncore, reg); + } else { + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(i915)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } + + overflow_hw = BIT_ULL(32); + time_hw = intel_uncore_read_fw(uncore, reg); + } + + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = rc6->prev_hw_residency[i]; + rc6->prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += rc6->cur_residency[i]; + rc6->cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); +} + +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h new file mode 100644 index 000000000000..1370f6834a4c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_H +#define INTEL_RC6_H + +#include "i915_reg.h" + +struct intel_engine_cs; +struct intel_rc6; + +void intel_rc6_init(struct intel_rc6 *rc6); +void intel_rc6_fini(struct intel_rc6 *rc6); + +void intel_rc6_sanitize(struct intel_rc6 *rc6); +void intel_rc6_enable(struct intel_rc6 *rc6); +void intel_rc6_disable(struct intel_rc6 *rc6); + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); + +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6); +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6); + +#endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h new file mode 100644 index 000000000000..89ad5697a8d4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_TYPES_H +#define INTEL_RC6_TYPES_H + +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "intel_engine_types.h" + +struct drm_i915_gem_object; + +struct intel_rc6 { + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + + struct drm_i915_gem_object *pctx; + + bool supported : 1; + bool enabled : 1; + bool wakeref : 1; + bool ctx_corrupted : 1; +}; + +#endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 6d05f9c64178..c4edc35e7d89 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_renderstate.h" +#include "intel_ring.h" struct intel_renderstate { const struct intel_renderstate_rodata *rodata; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8cea42379dd7..f03e000051c1 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -282,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, [BCS0] = GEN6_GRDOM_BLT, [VCS0] = GEN6_GRDOM_MEDIA, [VCS1] = GEN8_GRDOM_MEDIA2, [VECS0] = GEN6_GRDOM_VECS, }; + struct intel_engine_cs *engine; u32 hw_mask; if (engine_mask == ALL_ENGINES) { @@ -298,7 +298,7 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } @@ -307,7 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } -static u32 gen11_lock_sfc(struct intel_engine_cs *engine) +static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; @@ -316,6 +316,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) i915_reg_t sfc_usage; u32 sfc_usage_bit; u32 sfc_reset_bit; + int ret; switch (engine->class) { case VIDEO_DECODE_CLASS: @@ -350,27 +351,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) } /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). + * If the engine is using a SFC, tell the engine that a software reset + * is going to happen. The engine will then try to force lock the SFC. + * If SFC ends up being locked to the engine we want to reset, we have + * to reset it as well (we will unlock it once the reset sequence is + * completed). */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) + return 0; + rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); - if (__intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + ret = __intel_wait_for_register_fw(uncore, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL); + + /* Was the SFC released while we were trying to lock it? */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) return 0; - } - if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; + if (ret) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return ret; + } + *hw_mask |= sfc_reset_bit; return 0; } @@ -406,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, [VCS0] = GEN11_GRDOM_MEDIA, @@ -425,17 +432,26 @@ static int gen11_reset_engines(struct intel_gt *gt, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(engine); + ret = gen11_lock_sfc(engine, &hw_mask); + if (ret) + goto sfc_unlock; } } ret = gen6_hw_domain_reset(gt, hw_mask); +sfc_unlock: + /* + * We unlock the SFC based on the lock status and not the result of + * gen11_lock_sfc to make sure that we clean properly if something + * wrong happened during the lock (e.g. lock acquired after timeout + * expiration). + */ if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -494,7 +510,7 @@ static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -520,19 +536,30 @@ static int gen8_reset_engines(struct intel_gt *gt, ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; } +static int mock_reset(struct intel_gt *gt, + intel_engine_mask_t mask, + unsigned int retry) +{ + return 0; +} + typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); -static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +static reset_func intel_get_gpu_reset(const struct intel_gt *gt) { - if (INTEL_GEN(i915) >= 8) + struct drm_i915_private *i915 = gt->i915; + + if (is_mock_gt(gt)) + return mock_reset; + else if (INTEL_GEN(i915) >= 8) return gen8_reset_engines; else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; @@ -555,7 +582,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(gt->i915); + reset = intel_get_gpu_reset(gt); if (!reset) return -ENODEV; @@ -575,17 +602,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) return ret; } -bool intel_has_gpu_reset(struct drm_i915_private *i915) +bool intel_has_gpu_reset(const struct intel_gt *gt) { if (!i915_modparams.reset) return NULL; - return intel_get_gpu_reset(i915); + return intel_get_gpu_reset(gt); } -bool intel_has_reset_engine(struct drm_i915_private *i915) +bool intel_has_reset_engine(const struct intel_gt *gt) { - return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; + if (i915_modparams.reset < 2) + return false; + + return INTEL_INFO(gt->i915)->has_reset_engine; } int intel_reset_guc(struct intel_gt *gt) @@ -652,7 +682,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); @@ -682,10 +712,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(gt->i915); + i915_gem_restore_fences(gt->ggtt); return err; } @@ -695,7 +725,7 @@ static void reset_finish_engine(struct intel_engine_cs *engine) engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) @@ -703,7 +733,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { reset_finish_engine(engine); if (awake & engine->mask) intel_engine_pm_put(engine); @@ -739,7 +769,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -756,7 +786,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) __intel_gt_reset(gt, ALL_ENGINES); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->submit_request = nop_submit_request; /* @@ -768,7 +798,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->cancel_requests(engine); reset_finish(gt, awake); @@ -781,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt) intel_wakeref_t wakeref; mutex_lock(>->reset.mutex); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) __intel_gt_set_wedged(gt); mutex_unlock(>->reset.mutex); } @@ -791,11 +821,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; unsigned long flags; + bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!gt->scratch) /* Never full initialised, recovery impossible */ + /* Never fully initialised, recovery impossible */ + if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; GEM_TRACE("start\n"); @@ -812,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); @@ -827,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * (I915_FENCE_TIMEOUT) so this wait should not be unbounded * in the worst case. */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(fence); /* Restart iteration after droping lock */ spin_lock_irqsave(&timelines->lock, flags); @@ -836,7 +868,18 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) } spin_unlock_irqrestore(&timelines->lock, flags); - intel_gt_sanitize(gt, false); + /* We must reset pending GPU events before restoring our submission */ + ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; + if (!ok) { + /* + * Warn CI about the unrecoverable wedged condition. + * Time for a reboot. + */ + add_taint_for_CI(TAINT_WARN); + return false; + } /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -891,7 +934,7 @@ static int resume(struct intel_gt *gt) enum intel_engine_id id; int ret; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { ret = engine->resume(engine); if (ret) return ret; @@ -941,7 +984,7 @@ void intel_gt_reset(struct intel_gt *gt, awake = reset_prepare(gt); - if (!intel_has_gpu_reset(gt->i915)) { + if (!intel_has_gpu_reset(gt)) { if (i915_modparams.reset) dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else @@ -970,7 +1013,7 @@ void intel_gt_reset(struct intel_gt *gt, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(gt->i915); + ret = intel_gt_init_hw(gt); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); @@ -981,8 +1024,6 @@ void intel_gt_reset(struct intel_gt *gt, if (ret) goto taint; - intel_gt_queue_hangcheck(gt); - finish: reset_finish(gt, awake); unlock: @@ -1149,7 +1190,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine_mask &= INTEL_INFO(gt->i915)->engine_mask; @@ -1162,8 +1203,8 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) { - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) @@ -1191,7 +1232,7 @@ void intel_gt_handle_error(struct intel_gt *gt, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt->i915, tmp) { + for_each_engine(engine, gt, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) wait_on_bit(>->reset.flags, @@ -1201,7 +1242,7 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt->i915, tmp) + for_each_engine(engine, gt, tmp) clear_bit_unlock(I915_RESET_ENGINE + engine->id, >->reset.flags); clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); @@ -1209,7 +1250,7 @@ void intel_gt_handle_error(struct intel_gt *gt, wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) @@ -1251,10 +1292,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(>->i915->drm.struct_mutex)) - return -EAGAIN; - if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) @@ -1263,6 +1300,14 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) return intel_gt_is_wedged(gt) ? -EIO : 0; } +void intel_gt_set_wedged_on_init(struct intel_gt *gt) +{ + BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > + I915_WEDGED_ON_INIT); + intel_gt_set_wedged(gt); + set_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + void intel_gt_init_reset(struct intel_gt *gt) { init_waitqueue_head(>->reset.queue); @@ -1306,4 +1351,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 52c00199e069..8e8d5f761166 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -14,7 +14,6 @@ #include "intel_engine_types.h" #include "intel_reset_types.h" -struct drm_i915_private; struct i915_request; struct intel_engine_cs; struct intel_gt; @@ -45,6 +44,12 @@ void intel_gt_set_wedged(struct intel_gt *gt); bool intel_gt_unset_wedged(struct intel_gt *gt); int intel_gt_terminally_wedged(struct intel_gt *gt); +/* + * There's no unset_wedged_on_init paired with this one. + * Once we're wedged on init, there's no going back. + */ +void intel_gt_set_wedged_on_init(struct intel_gt *gt); + int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); int intel_reset_guc(struct intel_gt *gt); @@ -68,10 +73,13 @@ void __intel_fini_wedge(struct intel_wedge_me *w); static inline bool __intel_reset_failed(const struct intel_reset *reset) { + GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ? + !test_bit(I915_WEDGED, &reset->flags) : false); + return unlikely(test_bit(I915_WEDGED, &reset->flags)); } -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +bool intel_has_gpu_reset(const struct intel_gt *gt); +bool intel_has_reset_engine(const struct intel_gt *gt); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 31968356e0c0..f43bc3a0fe4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -29,11 +29,17 @@ struct intel_reset { * we set the #I915_WEDGED bit. Prior to command submission, e.g. * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. + * + * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no + * longer use the GPU - similar to #I915_WEDGED bit. The difference in + * in the way we're handling "forced" unwedged (e.g. through debugfs), + * which is not allowed in case we failed to initialize. */ unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_MODESET 1 #define I915_RESET_ENGINE 2 +#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) struct mutex mutex; /* serialises wedging/unwedging */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c new file mode 100644 index 000000000000..ece20504d240 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -0,0 +1,323 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_object.h" +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_engine.h" +#include "intel_ring.h" +#include "intel_timeline.h" + +unsigned int intel_ring_update_space(struct intel_ring *ring) +{ + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; +} + +int intel_ring_pin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + unsigned int flags; + void *addr; + int ret; + + if (atomic_fetch_inc(&ring->pin_count)) + return 0; + + flags = PIN_GLOBAL; + + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + ret = i915_vma_pin(vma, 0, 0, flags); + if (unlikely(ret)) + goto err_unpin; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } + + i915_vma_make_unshrinkable(vma); + + GEM_BUG_ON(ring->vaddr); + ring->vaddr = addr; + + return 0; + +err_ring: + i915_vma_unpin(vma); +err_unpin: + atomic_dec(&ring->pin_count); + return ret; +} + +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + tail = intel_ring_wrap(ring, tail); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + +void intel_ring_unpin(struct intel_ring *ring) +{ + struct i915_vma *vma = ring->vma; + + if (!atomic_dec_and_test(&ring->pin_count)) + return; + + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + + i915_vma_unset_ggtt_write(vma); + if (i915_vma_is_map_and_fenceable(vma)) + i915_vma_unpin_iomap(vma); + else + i915_gem_object_unpin_map(vma->obj); + + GEM_BUG_ON(!ring->vaddr); + ring->vaddr = NULL; + + i915_vma_unpin(vma); + i915_vma_make_purgeable(vma); +} + +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +{ + struct i915_address_space *vm = &ggtt->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = ERR_PTR(-ENODEV); + if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_ring *ring; + struct i915_vma *vma; + + GEM_BUG_ON(!is_power_of_2(size)); + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + return ERR_PTR(-ENOMEM); + + kref_init(&ring->ref); + ring->size = size; + + /* + * Workaround an erratum on the i830 which causes a hang if + * the TAIL pointer points to within the last 2 cachelines + * of the buffer. + */ + ring->effective_size = size; + if (IS_I830(i915) || IS_I845G(i915)) + ring->effective_size -= 2 * CACHELINE_BYTES; + + intel_ring_update_space(ring); + + vma = create_ring_vma(engine->gt->ggtt, size); + if (IS_ERR(vma)) { + kfree(ring); + return ERR_CAST(vma); + } + ring->vma = vma; + + return ring; +} + +void intel_ring_free(struct kref *ref) +{ + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); + + i915_vma_put(ring->vma); + kfree(ring); +} + +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) +{ + struct i915_request *target; + long timeout; + + if (intel_ring_update_space(ring) >= bytes) + return 0; + + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + + /* Would completion of this request free enough space? */ + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) + break; + } + + if (GEM_WARN_ON(&target->link == &tl->requests)) + return -ENOSPC; + + timeout = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; + + i915_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; +} + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) +{ + struct intel_ring *ring = rq->ring; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; + u32 *cs; + + /* Packets must be qword aligned. */ + GEM_BUG_ON(num_dwords & 1); + + total_bytes = bytes + rq->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); + + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = rq->reserved_space + remain_actual; + } + } + + if (unlikely(total_bytes > ring->space)) { + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_request_alloc() and i915_request_add(). + */ + GEM_BUG_ON(!rq->reserved_space); + + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); + if (unlikely(ret)) + return ERR_PTR(ret); + } + + if (unlikely(need_wrap)) { + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); + + /* Fill the tail with MI_NOOP */ + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); + ring->space -= need_wrap; + ring->emit = 0; + } + + GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); + cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); + ring->emit += bytes; + ring->space -= bytes; + + return cs; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct i915_request *rq) +{ + int num_dwords; + void *cs; + + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); + if (num_dwords == 0) + return 0; + + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + + cs = intel_ring_begin(rq, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); + intel_ring_advance(rq, cs + num_dwords); + + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h new file mode 100644 index 000000000000..ea2839d9e044 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_H +#define INTEL_RING_H + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "i915_request.h" +#include "intel_ring_types.h" + +struct intel_engine_cs; + +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); + +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); +int intel_ring_cacheline_align(struct i915_request *rq); + +unsigned int intel_ring_update_space(struct intel_ring *ring); + +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); + +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +#endif /* INTEL_RING_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index bacaa7bb8c9a..a47d5a7c32c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -40,6 +40,7 @@ #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_reset.h" +#include "intel_ring.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, @@ -47,16 +48,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -unsigned int intel_ring_update_space(struct intel_ring *ring) -{ - unsigned int space; - - space = __intel_ring_space(ring->head, ring->emit, ring->size); - - ring->space = space; - return space; -} - static int gen2_render_ring_flush(struct i915_request *rq, u32 mode) { @@ -322,7 +313,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -425,7 +417,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -439,8 +431,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -459,8 +451,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -930,6 +922,7 @@ static void cancel_requests(struct intel_engine_cs *engine) static void i9xx_submit_request(struct i915_request *request) { i915_request_submit(request); + wmb(); /* paranoid flush writes out of the WCB before mmio */ ENGINE_WRITE(request->engine, RING_TAIL, intel_ring_set_tail(request->ring, request->tail)); @@ -937,8 +930,8 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -960,8 +953,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -1184,167 +1177,9 @@ i915_emit_bb_start(struct i915_request *rq, return 0; } -int intel_ring_pin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - unsigned int flags; - void *addr; - int ret; - - if (atomic_fetch_inc(&ring->pin_count)) - return 0; - - flags = PIN_GLOBAL; - - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - - ret = i915_vma_pin(vma, 0, 0, flags); - if (unlikely(ret)) - goto err_unpin; - - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_ring; - } - - i915_vma_make_unshrinkable(vma); - - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; - - return 0; - -err_ring: - i915_vma_unpin(vma); -err_unpin: - atomic_dec(&ring->pin_count); - return ret; -} - -void intel_ring_reset(struct intel_ring *ring, u32 tail) -{ - tail = intel_ring_wrap(ring, tail); - ring->tail = tail; - ring->head = tail; - ring->emit = tail; - intel_ring_update_space(ring); -} - -void intel_ring_unpin(struct intel_ring *ring) -{ - struct i915_vma *vma = ring->vma; - - if (!atomic_dec_and_test(&ring->pin_count)) - return; - - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - - i915_vma_unset_ggtt_write(vma); - if (i915_vma_is_map_and_fenceable(vma)) - i915_vma_unpin_iomap(vma); - else - i915_gem_object_unpin_map(vma->obj); - - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); - i915_vma_make_purgeable(vma); -} - -static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) -{ - struct i915_address_space *vm = &ggtt->vm; - struct drm_i915_private *i915 = vm->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_stolen(i915, size); - if (!obj) - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - /* - * Mark ring buffers as read-only from GPU side (so no stray overwrites) - * if supported by the platform's GGTT. - */ - if (vm->has_read_only) - i915_gem_object_set_readonly(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - -struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_ring *ring; - struct i915_vma *vma; - - GEM_BUG_ON(!is_power_of_2(size)); - GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - return ERR_PTR(-ENOMEM); - - kref_init(&ring->ref); - - ring->size = size; - /* Workaround an erratum on the i830 which causes a hang if - * the TAIL pointer points to within the last 2 cachelines - * of the buffer. - */ - ring->effective_size = size; - if (IS_I830(i915) || IS_I845G(i915)) - ring->effective_size -= 2 * CACHELINE_BYTES; - - intel_ring_update_space(ring); - - vma = create_ring_vma(engine->gt->ggtt, size); - if (IS_ERR(vma)) { - kfree(ring); - return ERR_CAST(vma); - } - ring->vma = vma; - - return ring; -} - -void intel_ring_free(struct kref *ref) -{ - struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - - i915_vma_close(ring->vma); - i915_vma_put(ring->vma); - - kfree(ring); -} - static void __ring_context_fini(struct intel_context *ce) { - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) @@ -1609,7 +1444,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1663,7 +1498,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1676,7 +1511,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } @@ -1741,46 +1576,22 @@ static int remap_l3(struct i915_request *rq) static int switch_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - struct i915_address_space *vm = vm_alias(rq->hw_context); - unsigned int unwind_mm = 0; - u32 hw_flags = 0; + struct intel_context *ce = rq->hw_context; + struct i915_address_space *vm = vm_alias(ce); int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int loops; - - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); + if (ret) + return ret; } - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); + if (ce->state) { + u32 hw_flags; + + GEM_BUG_ON(rq->engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1789,22 +1600,25 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ + hw_flags = 0; if (i915_gem_context_is_kernel(rq->gem_context)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) - goto err_mm; + return ret; } if (vm) { + struct intel_engine_cs *engine = rq->engine; + ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = flush_pd_dir(rq); if (ret) - goto err_mm; + return ret; /* * Not only do we need a full barrier (post-sync write) after @@ -1816,24 +1630,18 @@ static int switch_context(struct i915_request *rq) */ ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = engine->emit_flush(rq, EMIT_FLUSH); if (ret) - goto err_mm; + return ret; } ret = remap_l3(rq); if (ret) - goto err_mm; + return ret; return 0; - -err_mm: - if (unwind_mm) - i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; -err: - return ret; } static int ring_request_alloc(struct i915_request *request) @@ -1841,7 +1649,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -1863,146 +1671,6 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int -wait_for_space(struct intel_ring *ring, - struct intel_timeline *tl, - unsigned int bytes) -{ - struct i915_request *target; - long timeout; - - if (intel_ring_update_space(ring) >= bytes) - return 0; - - GEM_BUG_ON(list_empty(&tl->requests)); - list_for_each_entry(target, &tl->requests, link) { - if (target->ring != ring) - continue; - - /* Would completion of this request free enough space? */ - if (bytes <= __intel_ring_space(target->postfix, - ring->emit, ring->size)) - break; - } - - if (GEM_WARN_ON(&target->link == &tl->requests)) - return -ENOSPC; - - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; - - i915_request_retire_upto(target); - - intel_ring_update_space(ring); - GEM_BUG_ON(ring->space < bytes); - return 0; -} - -u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) -{ - struct intel_ring *ring = rq->ring; - const unsigned int remain_usable = ring->effective_size - ring->emit; - const unsigned int bytes = num_dwords * sizeof(u32); - unsigned int need_wrap = 0; - unsigned int total_bytes; - u32 *cs; - - /* Packets must be qword aligned. */ - GEM_BUG_ON(num_dwords & 1); - - total_bytes = bytes + rq->reserved_space; - GEM_BUG_ON(total_bytes > ring->effective_size); - - if (unlikely(total_bytes > remain_usable)) { - const int remain_actual = ring->size - ring->emit; - - if (bytes > remain_usable) { - /* - * Not enough space for the basic request. So need to - * flush out the remainder and then wait for - * base + reserved. - */ - total_bytes += remain_actual; - need_wrap = remain_actual | 1; - } else { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate - * wrap and only need to effectively wait for the - * reserved size from the start of ringbuffer. - */ - total_bytes = rq->reserved_space + remain_actual; - } - } - - if (unlikely(total_bytes > ring->space)) { - int ret; - - /* - * Space is reserved in the ringbuffer for finalising the - * request, as that cannot be allowed to fail. During request - * finalisation, reserved_space is set to 0 to stop the - * overallocation and the assumption is that then we never need - * to wait (which has the risk of failing with EINTR). - * - * See also i915_request_alloc() and i915_request_add(). - */ - GEM_BUG_ON(!rq->reserved_space); - - ret = wait_for_space(ring, rq->timeline, total_bytes); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (unlikely(need_wrap)) { - need_wrap &= ~1; - GEM_BUG_ON(need_wrap > ring->space); - GEM_BUG_ON(ring->emit + need_wrap > ring->size); - GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); - - /* Fill the tail with MI_NOOP */ - memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); - ring->space -= need_wrap; - ring->emit = 0; - } - - GEM_BUG_ON(ring->emit > ring->size - bytes); - GEM_BUG_ON(ring->space < bytes); - cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); - ring->emit += bytes; - ring->space -= bytes; - - return cs; -} - -/* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct i915_request *rq) -{ - int num_dwords; - void *cs; - - num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - if (num_dwords == 0) - return 0; - - num_dwords = CACHELINE_DWORDS - num_dwords; - GEM_BUG_ON(num_dwords & 1); - - cs = intel_ring_begin(rq, num_dwords); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); - intel_ring_advance(rq, cs); - - GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); - return 0; -} - static void gen6_bsd_submit_request(struct i915_request *request) { struct intel_uncore *uncore = request->engine->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h new file mode 100644 index 000000000000..d9f17f38e0cc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RING_TYPES_H +#define INTEL_RING_TYPES_H + +#include <linux/atomic.h> +#include <linux/kref.h> +#include <linux/types.h> + +/* + * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) + +struct i915_vma; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + +#endif /* INTEL_RING_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c new file mode 100644 index 000000000000..20d6ee148afc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -0,0 +1,1872 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_rps.h" +#include "intel_sideband.h" +#include "../../../platform/x86/intel_ips.h" + +/* + * Lock protecting IPS related data structures + */ +static DEFINE_SPINLOCK(mchdev_lock); + +static struct intel_gt *rps_to_gt(struct intel_rps *rps) +{ + return container_of(rps, struct intel_gt, rps); +} + +static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) +{ + return rps_to_gt(rps)->i915; +} + +static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) +{ + return rps_to_gt(rps)->uncore; +} + +static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) +{ + return mask & ~rps->pm_intrmsk_mbz; +} + +static u32 rps_pm_mask(struct intel_rps *rps, u8 val) +{ + u32 mask = 0; + + /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= (GEN6_PM_RP_UP_EI_EXPIRED | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= rps->pm_events; + + return rps_pm_sanitize_mask(rps, ~mask); +} + +static void rps_reset_ei(struct intel_rps *rps) +{ + memset(&rps->ei, 0, sizeof(rps->ei)); +} + +static void rps_enable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps_reset_ei(rps); + + if (IS_VALLEYVIEW(gt->i915)) + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_enable_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, rps->cur_freq)); +} + +static void gen6_rps_reset_interrupts(struct intel_rps *rps) +{ + gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); +} + +static void gen11_rps_reset_interrupts(struct intel_rps *rps) +{ + while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) + ; +} + +static void rps_reset_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock_irq(>->irq_lock); + if (INTEL_GEN(gt->i915) >= 11) + gen11_rps_reset_interrupts(rps); + else + gen6_rps_reset_interrupts(rps); + + rps->pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +static void rps_disable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps->pm_events = 0; + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_sanitize_mask(rps, ~0u)); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(>->irq_lock); + + intel_synchronize_irq(gt->i915); + + /* + * Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + + rps_reset_interrupts(rps); +} + +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; + +static void gen5_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fmax, fmin, fstart; + u32 rgvmodectl; + int c_m, i; + + if (i915->fsb_freq <= 3200) + c_m = 0; + else if (i915->fsb_freq <= 4800) + c_m = 1; + else + c_m = 2; + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + rps->ips.m = cparams[i].m; + rps->ips.c = cparams[i].c; + break; + } + } + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->min_freq = fmax; + rps->max_freq = fmin; + + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +static unsigned long +__ips_chipset_val(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + unsigned long now = jiffies_to_msecs(jiffies), dt; + unsigned long result; + u64 total, delta; + + lockdep_assert_held(&mchdev_lock); + + /* + * Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + dt = now - ips->last_time1; + if (dt <= 10) + return ips->chipset_power; + + /* FIXME: handle per-counter overflow */ + total = intel_uncore_read(uncore, DMIEC); + total += intel_uncore_read(uncore, DDREC); + total += intel_uncore_read(uncore, CSIEC); + + delta = total - ips->last_count1; + + result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); + + ips->last_count1 = total; + ips->last_time1 = now; + + ips->chipset_power = result; + + return result; +} + +static unsigned long ips_mch_val(struct intel_uncore *uncore) +{ + unsigned int m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(uncore, TSFS); + x = intel_uncore_read8(uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; + + return m * x / 127 - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + + if (INTEL_INFO(i915)->is_mobile) + return max(vd - 1125, 0); + + return vd; +} + +static void __gen5_ips_update(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + u64 now, delta, dt; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + dt = now - ips->last_time2; + do_div(dt, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (dt <= 10) + return; + + count = intel_uncore_read(uncore, GFXEC); + delta = count - ips->last_count2; + + ips->last_count2 = count; + ips->last_time2 = now; + + /* More magic constants... */ + ips->gfx_power = div_u64(delta * 1181, dt * 10); +} + +static void gen5_rps_update(struct intel_rps *rps) +{ + spin_lock_irq(&mchdev_lock); + __gen5_ips_update(&rps->ips); + spin_unlock_irq(&mchdev_lock); +} + +static bool gen5_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + /* Invert the frequency bin into an ips delay */ + val = rps->max_freq - val; + val = rps->min_freq + val; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return div * 133333 / (pre << post); +} + +static unsigned int init_emon(struct intel_uncore *uncore) +{ + u8 pxw[16]; + int i; + + /* Disable to program */ + intel_uncore_write(uncore, ECR, 0); + intel_uncore_posting_read(uncore, ECR); + + /* Program energy weights for various events */ + intel_uncore_write(uncore, SDEW, 0x15040d00); + intel_uncore_write(uncore, CSIEW0, 0x007f0000); + intel_uncore_write(uncore, CSIEW1, 0x1e220004); + intel_uncore_write(uncore, CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + intel_uncore_write(uncore, PEW(i), 0); + for (i = 0; i < 3; i++) + intel_uncore_write(uncore, DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); + unsigned int freq = intel_pxfreq(pxvidfreq); + unsigned int vid = + (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + unsigned int val; + + val = vid * vid * freq / 1000 * 255; + val /= 127 * 127 * 900; + + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + intel_uncore_write(uncore, PXW(i), + pxw[i * 4 + 0] << 24 | + pxw[i * 4 + 1] << 16 | + pxw[i * 4 + 2] << 8 | + pxw[i * 4 + 3] << 0); + } + + /* Adjust magic regs to magic values (more experimental results) */ + intel_uncore_write(uncore, OGW0, 0); + intel_uncore_write(uncore, OGW1, 0); + intel_uncore_write(uncore, EG0, 0x00007f00); + intel_uncore_write(uncore, EG1, 0x0000000e); + intel_uncore_write(uncore, EG2, 0x000e0000); + intel_uncore_write(uncore, EG3, 0x68000300); + intel_uncore_write(uncore, EG4, 0x42000000); + intel_uncore_write(uncore, EG5, 0x00140031); + intel_uncore_write(uncore, EG6, 0); + intel_uncore_write(uncore, EG7, 0); + + for (i = 0; i < 8; i++) + intel_uncore_write(uncore, PXWL(i), 0); + + /* Enable PMON + select events */ + intel_uncore_write(uncore, ECR, 0x80000019); + + return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; +} + +static bool gen5_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fstart, vstart; + u32 rgvmodectl; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, + intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, + intel_uncore_read16(uncore, TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + gen5_rps_set(rps, rps->cur_freq); + + rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); + rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); + rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); + rps->ips.last_time1 = jiffies_to_msecs(jiffies); + + rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + rps->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); + + rps->ips.corr = init_emon(uncore); + + return true; +} + +static void gen5_rps_disable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + gen5_rps_set(rps, rps->idle_freq); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +static u32 rps_limits(struct intel_rps *rps, u8 val) +{ + u32 limits; + + /* + * Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + limits = rps->max_freq_softlimit << 23; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct intel_rps *rps, int new_power) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(i915)) + goto skip_hw_write; + + intel_uncore_write(uncore, GEN6_RP_UP_EI, + GT_INTERVAL_FROM_US(i915, ei_up)); + intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_up * threshold_up / 100)); + + intel_uncore_write(uncore, GEN6_RP_DOWN_EI, + GT_INTERVAL_FROM_US(i915, ei_down)); + intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_down * threshold_down / 100)); + + intel_uncore_write(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) +{ + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(rps, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) +{ + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && rps->active) + rps_set_power(rps, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static int gen6_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 swreq; + + if (INTEL_GEN(i915) >= 9) + swreq = GEN9_FREQUENCY(val); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + swreq = HSW_FREQUENCY(val); + else + swreq = (GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq); + + return 0; +} + +static int vlv_rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + vlv_punit_get(i915); + err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(i915); + + return err; +} + +static int rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + if (INTEL_GEN(i915) < 6) + return 0; + + if (val == rps->last_freq) + return 0; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_rps_set(rps, val); + else + err = gen6_rps_set(rps, val); + if (err) + return err; + + gen6_rps_set_thresholds(rps, val); + rps->last_freq = val; + + return 0; +} + +void intel_rps_unpark(struct intel_rps *rps) +{ + u8 freq; + + if (!rps->enabled) + return; + + /* + * Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + mutex_lock(&rps->lock); + rps->active = true; + freq = max(rps->cur_freq, rps->efficient_freq), + freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); + intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); + + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_enable_interrupts(rps); + + if (IS_GEN(rps_to_i915(rps), 5)) + gen5_rps_update(rps); +} + +void intel_rps_park(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (!rps->enabled) + return; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); + + rps->active = false; + if (rps->last_freq <= rps->idle_freq) + return; + + /* + * The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); + rps_set(rps, rps->idle_freq); + intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); +} + +void intel_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->engine->gt->rps; + unsigned long flags; + + if (i915_request_signaled(rq) || !rps->active) + return; + + /* Serializes with i915_request_retire() */ + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + rq->flags |= I915_REQUEST_WAITBOOST; + + if (!atomic_fetch_inc(&rps->num_waiters) && + READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +int intel_rps_set(struct intel_rps *rps, u8 val) +{ + int err = 0; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (rps->active) { + err = rps_set(rps, val); + + /* + * Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS, + rps_limits(rps, val)); + + intel_uncore_write(uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, val)); + } + } + + if (err == 0) + rps->cur_freq = val; + + return err; +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(i915)) { + u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(i915) || IS_BROADWELL(i915) || + IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(i915, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + (ddcc_status >> 8) & 0xff, + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static bool rps_reset(struct intel_rps *rps) +{ + /* force a reset */ + rps->power.mode = -1; + rps->last_freq = -1; + + if (rps_set(rps, rps->min_freq)) { + DRM_ERROR("Failed to reset RPS to initial values\n"); + return false; + } + + rps->cur_freq = rps->min_freq; + return true; +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static bool gen9_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(i915, 9)) + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(rps->rp1_freq)); + + /* 1 second timeout */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(i915, 1000000)); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + + return rps_reset(rps); +} + +static bool gen8_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + 100000000 / 128); /* 1 second timeout */ + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static bool gen6_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Power down if completely idle for over 50ms */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static int chv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(i915)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; + break; + case 12: + /* (2 * 6) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; + break; + } + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static int chv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; + + return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; +} + +static int chv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static u32 chv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static bool chv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + /* 1: Program defaults and thresholds for RPS*/ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(i915); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static int vlv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp1; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; + rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int vlv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp0; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int vlv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rpe; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int vlv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static bool vlv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(i915); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static unsigned long __ips_gfx_val(struct intel_ips *ips) +{ + struct intel_rps *rps = container_of(ips, typeof(*rps), ips); + struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); + + state1 = ext_v; + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + t = ips_mch_val(uncore); + if (t > 80) + corr = t * 2349 + 135940; + else if (t >= 50) + corr = t * 964 + 29317; + else /* < 50 */ + corr = t * 301 + 1004; + + corr = corr * 150142 * state1 / 10000 - 78642; + corr /= 100000; + corr2 = corr * ips->corr; + + state2 = corr2 * state1 / 10000; + state2 /= 100; /* convert to mW */ + + __gen5_ips_update(ips); + + return ips->gfx_power + state2; +} + +void intel_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (IS_CHERRYVIEW(i915)) + rps->enabled = chv_rps_enable(rps); + else if (IS_VALLEYVIEW(i915)) + rps->enabled = vlv_rps_enable(rps); + else if (INTEL_GEN(i915) >= 9) + rps->enabled = gen9_rps_enable(rps); + else if (INTEL_GEN(i915) >= 8) + rps->enabled = gen8_rps_enable(rps); + else if (INTEL_GEN(i915) >= 6) + rps->enabled = gen6_rps_enable(rps); + else if (IS_IRONLAKE_M(i915)) + rps->enabled = gen5_rps_enable(rps); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (!rps->enabled) + return; + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + +static void gen6_rps_disable(struct intel_rps *rps) +{ + intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); +} + +void intel_rps_disable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->enabled = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_disable(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_disable(rps); +} + +static int byt_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct intel_rps *rps, int val) +{ + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct intel_rps *rps, int val) +{ + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(i915)) + return chv_gpu_freq(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_gpu_freq(rps, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(i915)) + return chv_freq_opcode(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_freq_opcode(rps, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + +static void vlv_init_gpll_ref_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->gpll_ref_freq = + vlv_get_cck_clock(i915, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); +} + +static void vlv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + i915->mem_freq = 800; + break; + case 2: + i915->mem_freq = 1066; + break; + case 3: + i915->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = vlv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = vlv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = vlv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = vlv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void chv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_cck_read(i915, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + i915->mem_freq = 2000; + break; + default: + i915->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = chv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = chv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = chv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = chv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); + ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(uncore, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= rps_to_i915(rps)->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void rps_work(struct work_struct *work) +{ + struct intel_rps *rps = container_of(work, typeof(*rps), work); + struct intel_gt *gt = rps_to_gt(rps); + bool client_boost = false; + int new_freq, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + if ((pm_iir & rps->pm_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(rps, pm_iir); + + adj = rps->last_adj; + new_freq = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_freq < rps->boost_freq) { + new_freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; + + if (new_freq >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_freq = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_freq = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; + + if (new_freq <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_freq += adj; + new_freq = clamp_t(int, new_freq, min, max); + + if (intel_rps_set(rps, new_freq)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + spin_lock_irq(>->irq_lock); + gen6_gt_pm_unmask_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); +} + +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + const u32 events = rps->pm_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + + if (pm_iir & rps->pm_events) { + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); + rps->pm_iir |= pm_iir & rps->pm_events; + schedule_work(&rps->work); + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(gt->i915) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(gt->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + +void gen5_rps_irq_handler(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_freq; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + new_freq = rps->cur_freq; + if (busy_up > max_avg) + new_freq++; + else if (busy_down < min_avg) + new_freq--; + new_freq = clamp(new_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + rps->cur_freq = new_freq; + + spin_unlock(&mchdev_lock); +} + +void intel_rps_init_early(struct intel_rps *rps) +{ + mutex_init(&rps->lock); + mutex_init(&rps->power.mutex); + + INIT_WORK(&rps->work, rps_work); + + atomic_set(&rps->num_waiters, 0); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_CHERRYVIEW(i915)) + chv_rps_init(rps); + else if (IS_VALLEYVIEW(i915)) + vlv_rps_init(rps); + else if (INTEL_GEN(i915) >= 6) + gen6_rps_init(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_init(rps); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + u32 params = 0; + + sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(i915) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(i915) >= 8) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; + + if (INTEL_GEN(i915) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} + +/* External interface for intel_ips.ko */ + +static struct drm_i915_private __rcu *ips_mchdev; + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_rps_driver_register(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + /* + * We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. + */ + if (IS_GEN(gt->i915, 5)) { + rcu_assign_pointer(ips_mchdev, gt->i915); + ips_ping_for_i915_load(); + } +} + +void intel_rps_driver_unregister(struct intel_rps *rps) +{ + rcu_assign_pointer(ips_mchdev, NULL); +} + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(ips_mchdev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_ips *ips = &i915->gt.rps.ips; + + spin_lock_irq(&mchdev_lock); + chipset_val = __ips_chipset_val(ips); + graphics_val = __ips_gfx_val(ips); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit < rps->max_freq) + rps->max_freq_softlimit++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit > rps->min_freq) + rps->max_freq_softlimit--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + rps->max_freq_softlimit = rps->min_freq; + ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h new file mode 100644 index 000000000000..9518c66c9792 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_H +#define INTEL_RPS_H + +#include "intel_rps_types.h" + +struct i915_request; + +void intel_rps_init_early(struct intel_rps *rps); +void intel_rps_init(struct intel_rps *rps); + +void intel_rps_driver_register(struct intel_rps *rps); +void intel_rps_driver_unregister(struct intel_rps *rps); + +void intel_rps_enable(struct intel_rps *rps); +void intel_rps_disable(struct intel_rps *rps); + +void intel_rps_park(struct intel_rps *rps); +void intel_rps_unpark(struct intel_rps *rps); +void intel_rps_boost(struct i915_request *rq); + +int intel_rps_set(struct intel_rps *rps, u8 val); +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + +int intel_gpu_freq(struct intel_rps *rps, int val); +int intel_freq_opcode(struct intel_rps *rps, int val); +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1); + +void gen5_rps_irq_handler(struct intel_rps *rps); +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); + +#endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h new file mode 100644 index 000000000000..c2e279154bd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_TYPES_H +#define INTEL_RPS_TYPES_H + +#include <linux/atomic.h> +#include <linux/ktime.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct intel_ips { + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c, m; +}; + +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool enabled; + bool active; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + u32 pm_events; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 last_freq; /* Last SWREQ frequency */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; + struct intel_ips ips; +}; + +#endif /* INTEL_RPS_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6bf2d87da109..74f793423231 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,19 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice) +{ + sseu->max_slices = max_slices; + sseu->max_subslices = max_subslices; + sseu->max_eus_per_subslice = max_eus_per_subslice; + + sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); + sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); +} + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { @@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + int i, offset = slice * sseu->ss_stride; + u32 mask = 0; + + GEM_BUG_ON(slice >= sseu->max_slices); + + for (i = 0; i < sseu->ss_stride; i++) + mask |= (u32)sseu->subslice_mask[offset + i] << + i * BITS_PER_BYTE; + + return mask; +} + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); +} + unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) { - return hweight8(sseu->subslice_mask[slice]); + return hweight32(intel_sseu_get_subslices(sseu, slice)); } u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index b50d0401a4e2..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -10,15 +10,21 @@ #include <linux/types.h> #include <linux/kernel.h> +#include "i915_gem.h" + struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) +#define GEN_MAX_EUS (16) /* TGL upper bound */ +#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; + u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -33,11 +39,8 @@ struct sseu_dev_info { u8 max_subslices; u8 max_eus_per_subslice; - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; + u8 ss_stride; + u8 eu_stride; }; /* @@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +static inline bool +intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + u8 mask; + int ss_idx = subslice / BITS_PER_BYTE; + + GEM_BUG_ON(ss_idx >= sseu->ss_stride); + + mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; + + return mask & BIT(subslice % BITS_PER_BYTE); +} + +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice); + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask); + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9cb01d9828f1..14ad10acd548 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -4,13 +4,13 @@ * Copyright © 2016-2018 Intel Corporation */ -#include "gt/intel_gt_types.h" - #include "i915_drv.h" #include "i915_active.h" #include "i915_syncmap.h" -#include "gt/intel_timeline.h" +#include "intel_gt.h" +#include "intel_ring.h" +#include "intel_timeline.h" #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) @@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) kfree(cl); } +__i915_active_call static void __cacheline_retire(struct i915_active *active) { struct intel_timeline_cacheline *cl = @@ -177,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); - i915_active_init(hwsp->gt->i915, &cl->active, - __cacheline_active, __cacheline_retire); + i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); return cl; } @@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -442,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq); + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); if (err) goto err_cacheline; @@ -493,24 +493,39 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, static int cacheline_ref(struct intel_timeline_cacheline *cl, struct i915_request *rq) { - return i915_active_ref(&cl->active, rq->timeline, rq); + return i915_active_add_request(&cl->active, rq); } int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - struct intel_timeline *tl = from->timeline; + struct intel_timeline *tl; int err; - GEM_BUG_ON(to->timeline == tl); + rcu_read_lock(); + tl = rcu_dereference(from->timeline); + if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + if (!tl) /* already completed */ + return 1; + + GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); + + err = -EBUSY; + if (mutex_trylock(&tl->mutex)) { + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + + if (i915_request_completed(from)) { + err = 1; + goto unlock; + } - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) err = cacheline_ref(cl, to); - if (!err) { + if (err) + goto unlock; + if (likely(cl == tl->hwsp_cacheline)) { *hwsp = tl->hwsp_offset; } else { /* across a seqno wrap, recover the original offset */ @@ -518,8 +533,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; } + +unlock: + mutex_unlock(&tl->mutex); } - mutex_unlock(&tl->mutex); + intel_timeline_put(tl); return err; } @@ -541,7 +559,7 @@ void __intel_timeline_free(struct kref *kref) container_of(kref, typeof(*timeline), kref); intel_timeline_fini(timeline); - kfree(timeline); + kfree_rcu(timeline, rcu); } static void timelines_fini(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 2b1baf2fcc8e..98d9ee166379 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -58,12 +58,13 @@ struct intel_timeline { */ struct list_head requests; - /* Contains an RCU guarded pointer to the last request. No reference is + /* + * Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. + * the request using i915_active_fence_get(), or manage the RCU + * protection themselves (cf the i915_active_fence API). */ - struct i915_active_request last_request; + struct i915_active_fence last_request; /** * We track the most recent seqno that we wait on in every context so @@ -80,6 +81,7 @@ struct intel_timeline { struct intel_gt *gt; struct kref kref; + struct rcu_head rcu; }; #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5f6ec2fd29a0..e4bccc14602f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_ring.h" #include "intel_workarounds.h" /** @@ -567,6 +568,9 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + /* Wa_1409142259:tgl */ + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } static void @@ -796,11 +800,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) } slice = fls(sseu->slice_mask) - 1; - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = fls(l3_en & sseu->subslice_mask[slice]); + subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); if (!subslice) { DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", - sseu->subslice_mask[slice], l3_en); + intel_sseu_get_subslices(sseu, slice), l3_en); subslice = fls(l3_en); WARN_ON(!subslice); } @@ -890,11 +893,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + + /* Wa_1607087056:icl */ + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + /* Wa_1409420604:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); + + /* Wa_1409180338:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void @@ -1197,6 +1216,26 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) static void tgl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + switch (engine->class) { + case RENDER_CLASS: + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * + * This covers 4 registers which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); + break; + default: + break; + } } void intel_engine_init_whitelist(struct intel_engine_cs *engine) @@ -1258,6 +1297,26 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + /* Wa_1606700617:tgl */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + + /* Wa_1607138336:tgl */ + wa_write_or(wal, + GEN9_CTX_PREEMPT_REG, + GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); + + /* Wa_1607030317:tgl */ + /* Wa_1607186500:tgl */ + /* Wa_1607297627:tgl */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + } + if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -1452,7 +1511,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff)) + if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) return true; return false; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 5d43cbc3f345..83f549d203a0 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,6 +23,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_ring.h" #include "i915_drv.h" #include "intel_context.h" @@ -240,6 +241,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); + GEM_BUG_ON(!i915->gt.uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -248,9 +250,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; engine->base.gt = &i915->gt; + engine->base.uncore = i915->gt.uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); + engine->base.legacy_idx = INVALID_ENGINE; engine->base.instance = id; engine->base.status_page.addr = (void *)(engine + 1); @@ -265,6 +269,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.reset.finish = mock_reset_finish; engine->base.cancel_requests = mock_cancel_requests; + i915->gt.engine[id] = &engine->base; + i915->gt.engine_class[0][id] = &engine->base; + /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 9d1ea26c7a2d..bc720defc6b8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -14,22 +14,28 @@ static int request_sync(struct i915_request *rq) { + struct intel_timeline *tl = i915_request_timeline(rq); long timeout; int err = 0; + intel_timeline_get(tl); i915_request_get(rq); - i915_request_add(rq); + /* Opencode i915_request_add() so we can keep the timeline locked. */ + __i915_request_commit(rq); + __i915_request_queue(rq, NULL); + timeout = i915_request_wait(rq, 0, HZ / 10); - if (timeout < 0) { + if (timeout < 0) err = timeout; - } else { - mutex_lock(&rq->timeline->mutex); + else i915_request_retire_upto(rq); - mutex_unlock(&rq->timeline->mutex); - } + + lockdep_unpin_lock(&tl->mutex, rq->cookie); + mutex_unlock(&tl->mutex); i915_request_put(rq); + intel_timeline_put(tl); return err; } @@ -41,24 +47,20 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct i915_request *rq; + struct dma_fence *fence; long timeout; - rcu_read_lock(); - rq = rcu_dereference(tl->last_request.request); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) break; - timeout = i915_request_wait(rq, 0, HZ / 10); + timeout = dma_fence_wait_timeout(fence, false, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(rq); + i915_request_retire_upto(to_request(fence)); - i915_request_put(rq); + dma_fence_put(fence); } while (!err); mutex_unlock(&tl->mutex); @@ -101,9 +103,6 @@ static int __live_context_size(struct intel_engine_cs *engine, * * TLDR; this overlaps with the execlists redzone. */ - if (HAS_EXECLISTS(engine->i915)) - vaddr += LRC_HEADER_PAGES * PAGE_SIZE; - vaddr += engine->context_size - I915_GTT_PAGE_SIZE; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -153,15 +152,11 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - mutex_lock(>->i915->drm.struct_mutex); - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto unlock; - } + if (IS_ERR(fixme)) + return PTR_ERR(fixme); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct { struct drm_i915_gem_object *state; void *pinned; @@ -199,8 +194,6 @@ static int live_context_size(void *arg) } kernel_context_close(fixme); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -303,26 +296,23 @@ static int live_active_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_active_context(engine, fixme); if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } @@ -416,26 +406,23 @@ static int live_remote_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_remote_context(engine, fixme); if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c new file mode 100644 index 000000000000..e864406bd2d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -0,0 +1,350 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include <linux/sort.h> + +#include "i915_drv.h" + +#include "intel_gt_requests.h" +#include "i915_selftest.h" + +struct pulse { + struct i915_active active; + struct kref kref; +}; + +static int pulse_active(struct i915_active *active) +{ + kref_get(&container_of(active, struct pulse, active)->kref); + return 0; +} + +static void pulse_free(struct kref *kref) +{ + kfree(container_of(kref, struct pulse, kref)); +} + +static void pulse_put(struct pulse *p) +{ + kref_put(&p->kref, pulse_free); +} + +static void pulse_retire(struct i915_active *active) +{ + pulse_put(container_of(active, struct pulse, active)); +} + +static struct pulse *pulse_create(void) +{ + struct pulse *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return p; + + kref_init(&p->kref); + i915_active_init(&p->active, pulse_active, pulse_retire); + + return p; +} + +static void pulse_unlock_wait(struct pulse *p) +{ + mutex_lock(&p->active.mutex); + mutex_unlock(&p->active.mutex); + flush_work(&p->active.work); +} + +static int __live_idle_pulse(struct intel_engine_cs *engine, + int (*fn)(struct intel_engine_cs *cs)) +{ + struct pulse *p; + int err; + + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + p = pulse_create(); + if (!p) + return -ENOMEM; + + err = i915_active_acquire(&p->active); + if (err) + goto out; + + err = i915_active_acquire_preallocate_barrier(&p->active, engine); + if (err) { + i915_active_release(&p->active); + goto out; + } + + i915_active_acquire_barrier(&p->active); + i915_active_release(&p->active); + + GEM_BUG_ON(i915_active_is_idle(&p->active)); + GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); + + err = fn(engine); + if (err) + goto out; + + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); + + if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) { + err = -ETIME; + goto out; + } + + GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); + + pulse_unlock_wait(p); /* synchronize with the retirement callback */ + + if (!i915_active_is_idle(&p->active)) { + struct drm_printer m = drm_err_printer("pulse"); + + pr_err("%s: heartbeat pulse did not flush idle tasks\n", + engine->name); + i915_active_print(&p->active, &m); + + err = -EINVAL; + goto out; + } + +out: + pulse_put(p); + return err; +} + +static int live_idle_flush(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_flush_barriers); + intel_engine_pm_put(engine); + if (err) + break; + } + + return err; +} + +static int live_idle_pulse(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that heartbeat pulses flush the idle barriers */ + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_idle_pulse(engine, intel_engine_pulse); + intel_engine_pm_put(engine); + if (err && err != -ENODEV) + break; + + err = 0; + } + + return err; +} + +static int cmp_u32(const void *_a, const void *_b) +{ + const u32 *a = _a, *b = _b; + + return *a - *b; +} + +static int __live_heartbeat_fast(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + ktime_t t0, t1; + u32 times[5]; + int err; + int i; + + ce = intel_context_create(engine->kernel_context->gem_context, + engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + intel_engine_pm_get(engine); + + err = intel_engine_set_heartbeat(engine, 1); + if (err) + goto err_pm; + + for (i = 0; i < ARRAY_SIZE(times); i++) { + /* Manufacture a tick */ + do { + while (READ_ONCE(engine->heartbeat.systole)) + flush_delayed_work(&engine->heartbeat.work); + + engine->serial++; /* quick, pretend we are not idle! */ + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat did not start\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + rcu_read_lock(); + rq = READ_ONCE(engine->heartbeat.systole); + if (rq) + rq = i915_request_get_rcu(rq); + rcu_read_unlock(); + } while (!rq); + + t0 = ktime_get(); + while (rq == READ_ONCE(engine->heartbeat.systole)) + yield(); /* work is on the local cpu! */ + t1 = ktime_get(); + + i915_request_put(rq); + times[i] = ktime_us_delta(t1, t0); + } + + sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); + + pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + times[0], + times[ARRAY_SIZE(times) - 1]); + + /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ + if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { + pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", + engine->name, + times[ARRAY_SIZE(times) / 2], + jiffies_to_usecs(6)); + err = -EINVAL; + } + + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + intel_context_put(ce); + return err; +} + +static int live_heartbeat_fast(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that the heartbeat ticks at the desired rate. */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + err = __live_heartbeat_fast(engine); + if (err) + break; + } + + return err; +} + +static int __live_heartbeat_off(struct intel_engine_cs *engine) +{ + int err; + + intel_engine_pm_get(engine); + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (!delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat not running\n", + engine->name); + err = -EINVAL; + goto err_pm; + } + + err = intel_engine_set_heartbeat(engine, 0); + if (err) + goto err_pm; + + engine->serial++; + flush_delayed_work(&engine->heartbeat.work); + if (delayed_work_pending(&engine->heartbeat.work)) { + pr_err("%s: heartbeat still running\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + + if (READ_ONCE(engine->heartbeat.systole)) { + pr_err("%s: heartbeat still allocated\n", + engine->name); + err = -EINVAL; + goto err_beat; + } + +err_beat: + intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); +err_pm: + intel_engine_pm_put(engine); + return err; +} + +static int live_heartbeat_off(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* Check that we can turn off heartbeat and not interrupt VIP */ + if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + return 0; + + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + err = __live_heartbeat_off(engine); + if (err) + break; + } + + return err; +} + +int intel_heartbeat_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_idle_flush), + SUBTEST(live_idle_pulse), + SUBTEST(live_heartbeat_fast), + SUBTEST(live_heartbeat_off), + }; + int saved_hangcheck; + int err; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + saved_hangcheck = i915_modparams.enable_hangcheck; + i915_modparams.enable_hangcheck = INT_MAX; + + err = intel_gt_live_subtests(tests, &i915->gt); + + i915_modparams.enable_hangcheck = saved_hangcheck; + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 3a1419376912..20b9c83f43ad 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -25,7 +25,7 @@ static int live_engine_pm(void *arg) } GEM_BUG_ON(intel_gt_pm_is_awake(gt)); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { const typeof(*igt_atomic_phases) *p; for (p = igt_atomic_phases; p->name; p++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c new file mode 100644 index 000000000000..d1752f15702a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -0,0 +1,60 @@ + +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "selftest_llc.h" + +static int live_gt_resume(void *arg) +{ + struct intel_gt *gt = arg; + IGT_TIMEOUT(end_time); + int err; + + /* Do several suspend/resume cycles to check we don't explode! */ + do { + intel_gt_suspend_prepare(gt); + intel_gt_suspend_late(gt); + + if (gt->rc6.enabled) { + pr_err("rc6 still enabled after suspend!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = intel_gt_resume(gt); + if (err) + break; + + if (gt->rc6.supported && !gt->rc6.enabled) { + pr_err("rc6 not enabled upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = st_llc_verify(>->llc); + if (err) { + pr_err("llc state not restored upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + break; + } + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +int intel_gt_pm_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_gt_resume), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index a0098fc35921..85e9ccf5c304 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -131,7 +131,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -141,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) int err; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + i915_vm_put(vm); return ERR_CAST(obj); + } vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); + i915_vm_put(vm); return ERR_CAST(vaddr); } @@ -157,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_vm_put(vm); return ERR_CAST(vma); + } hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) + if (IS_ERR(hws)) { + i915_vm_put(vm); return ERR_CAST(hws); + } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) + if (err) { + i915_vm_put(vm); return ERR_PTR(err); + } err = i915_vma_pin(hws, 0, 0, PIN_USER); if (err) @@ -264,6 +273,7 @@ unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); + i915_vm_put(vm); return err ? ERR_PTR(err) : rq; } @@ -285,7 +295,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -309,12 +319,11 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_wedge_me w; long timeout; @@ -355,8 +364,6 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -383,9 +390,7 @@ static int igt_reset_nop(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -395,9 +400,7 @@ static int igt_reset_nop(void *arg) reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(>->i915->drm.struct_mutex); - - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { int i; for (i = 0; i < 16; i++) { @@ -417,7 +420,6 @@ static int igt_reset_nop(void *arg) intel_gt_reset(gt, ALL_ENGINES, NULL); igt_global_reset_unlock(gt); - mutex_unlock(>->i915->drm.struct_mutex); if (intel_gt_is_wedged(gt)) { err = -EIO; break; @@ -429,16 +431,13 @@ static int igt_reset_nop(void *arg) break; } - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -458,23 +457,21 @@ static int igt_reset_nop_engine(void *arg) /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned int count; IGT_TIMEOUT(end_time); @@ -494,7 +491,6 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -507,7 +503,6 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } err = intel_engine_reset(engine, NULL); - mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -533,15 +528,12 @@ static int igt_reset_nop_engine(void *arg) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -559,18 +551,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (active) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); @@ -593,17 +583,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (active) { struct i915_request *rq; - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -647,7 +634,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } @@ -655,11 +642,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (intel_gt_is_wedged(gt)) err = -EIO; - if (active) { - mutex_lock(>->i915->drm.struct_mutex); + if (active) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -725,9 +709,7 @@ static int active_engine(void *data) return PTR_ERR(file); for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); if (IS_ERR(ctx[count])) { err = PTR_ERR(ctx[count]); while (--count) @@ -741,10 +723,8 @@ static int active_engine(void *data) struct i915_request *old = rq[idx]; struct i915_request *new; - mutex_lock(&engine->i915->drm.struct_mutex); new = igt_request_alloc(ctx[idx], engine); if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } @@ -755,7 +735,6 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); err = active_request_put(old); if (err) @@ -791,13 +770,11 @@ static int __igt_reset_engines(struct intel_gt *gt, * with any other engine. */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -805,7 +782,7 @@ static int __igt_reset_engines(struct intel_gt *gt, h.ctx->sched.priority = 1024; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; @@ -823,7 +800,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { struct task_struct *tsk; threads[tmp].resets = @@ -849,23 +826,22 @@ static int __igt_reset_engines(struct intel_gt *gt, get_task_struct(tsk); } + yield(); /* start all threads before we begin */ + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -940,7 +916,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } unwind: - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { int ret; if (!threads[tmp].task) @@ -977,9 +953,7 @@ unwind: if (err) break; - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915); if (err) break; } @@ -987,11 +961,8 @@ unwind: if (intel_gt_is_wedged(gt)) err = -EIO; - if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); + if (flags & TEST_ACTIVE) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -1047,7 +1018,7 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; @@ -1061,7 +1032,6 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1109,7 +1079,6 @@ out_rq: fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1127,15 +1096,14 @@ static int evict_vma(void *data) { struct evict_vma *arg = data; struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; struct drm_mm_node evict = arg->vma->node; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&vm->mutex); err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); return err; } @@ -1143,39 +1111,33 @@ static int evict_vma(void *data) static int evict_fence(void *data) { struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); - /* Mark the fence register as dirty to force the mmio update. */ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); if (err) { pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); if (err) { pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin_fence(arg->vma); i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } i915_vma_unpin_fence(arg->vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return 0; } static int __igt_reset_evict_vma(struct intel_gt *gt, @@ -1183,23 +1145,26 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; struct evict_vma arg; struct hang h; + unsigned int pin_flags; int err; + if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) + return 0; + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { @@ -1227,10 +1192,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, goto out_obj; } - err = i915_vma_pin(arg.vma, 0, 0, - i915_vma_is_ggtt(arg.vma) ? - PIN_GLOBAL | PIN_MAPPABLE : - PIN_USER); + pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + pin_flags |= PIN_MAPPABLE; + + err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); goto out_obj; @@ -1262,8 +1229,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (err) goto out_rq; - mutex_unlock(>->i915->drm.struct_mutex); - if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1312,16 +1277,12 @@ out_reset: put_task_struct(tsk); } - mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: i915_gem_object_put(obj); fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); - if (intel_gt_is_wedged(gt)) return -EIO; @@ -1340,6 +1301,7 @@ static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; struct i915_gem_context *ctx; + struct i915_address_space *vm; struct drm_file *file; int err; @@ -1347,18 +1309,20 @@ static int igt_reset_evict_ppgtt(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } err = 0; - if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, ctx->vm, + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_is_ggtt(vm)) { + /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(gt, vm, evict_vma, EXEC_OBJECT_WRITE); + } + i915_vm_put(vm); out: mock_file_free(gt->i915, file); @@ -1379,7 +1343,7 @@ static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (engine == exclude) continue; @@ -1403,12 +1367,11 @@ static int igt_reset_queue(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1518,7 +1481,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -1526,7 +1489,6 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1539,7 +1501,7 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->i915->engine[RCS0]; + struct intel_engine_cs *engine = gt->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1547,17 +1509,15 @@ static int igt_handle_error(void *arg) /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, gt); if (err) - goto err_unlock; + return err; rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -1581,8 +1541,6 @@ static int igt_handle_error(void *arg) goto err_request; } - mutex_unlock(>->i915->drm.struct_mutex); - /* Temporarily disable error capture */ error = xchg(&global->first_error, (void *)-1); @@ -1590,8 +1548,6 @@ static int igt_handle_error(void *arg) xchg(&global->first_error, error); - mutex_lock(>->i915->drm.struct_mutex); - if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); err = -EINVAL; @@ -1602,8 +1558,6 @@ err_request: i915_request_put(rq); err_fini: hang_fini(&h); -err_unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1617,7 +1571,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable_nosync(t); + tasklet_disable(t); p->critical_section_begin(); err = intel_engine_reset(engine, NULL); @@ -1689,14 +1643,13 @@ static int igt_reset_engines_atomic(void *arg) /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) return 0; igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ if (!igt_force_reset(gt)) @@ -1706,7 +1659,7 @@ static int igt_reset_engines_atomic(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; @@ -1716,9 +1669,7 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ igt_force_reset(gt); - unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); return err; @@ -1743,27 +1694,19 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; - bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(>->hangcheck.work); /* flush param */ + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = intel_gt_live_subtests(tests, gt); - mutex_lock(>->i915->drm.struct_mutex); - igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - - i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c new file mode 100644 index 000000000000..fd3770e48ac7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_pm.h" /* intel_gpu_freq() */ +#include "selftest_llc.h" +#include "intel_rps.h" + +static int gen6_verify_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + intel_wakeref_t wakeref; + unsigned int gpu_freq; + int err = 0; + + wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm); + + if (!get_ia_constants(llc, &consts)) { + err = -ENODEV; + goto out_rpm; + } + + for (gpu_freq = consts.min_gpu_freq; + gpu_freq <= consts.max_gpu_freq; + gpu_freq++) { + struct intel_rps *rps = &llc_to_gt(llc)->rps; + + unsigned int ia_freq, ring_freq, found; + u32 val; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + + val = gpu_freq; + if (sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &val, NULL)) { + pr_err("Failed to read freq table[%d], range [%d, %d]\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq); + err = -ENXIO; + break; + } + + found = (val >> 0) & 0xff; + if (found != ia_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ia_freq); + err = -EINVAL; + break; + } + + found = (val >> 8) & 0xff; + if (found != ring_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ring_freq); + err = -EINVAL; + break; + } + } + +out_rpm: + intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref); + return err; +} + +int st_llc_verify(struct intel_llc *llc) +{ + int err = 0; + + if (HAS_LLC(llc_to_gt(llc)->i915)) + err = gen6_verify_ring_freq(llc); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h new file mode 100644 index 000000000000..873f896e72f2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_LLC_H +#define SELFTEST_LLC_H + +struct intel_llc; + +int st_llc_verify(struct intel_llc *llc); + +#endif /* SELFTEST_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d791158988d6..eb71ac2f992c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -19,26 +20,52 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + static int live_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; struct igt_spinner spin; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) goto err_spin; @@ -55,13 +82,13 @@ static int live_sanitycheck(void *arg) if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx; } igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx; } @@ -73,12 +100,175 @@ err_ctx: kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } +static int live_unlite_restore(struct intel_gt *gt, int prio) +{ + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + struct igt_spinner spin; + int err = -ENOMEM; + + /* + * Check that we can correctly context switch between 2 instances + * on the same engine from the same parent context. + */ + + if (igt_spinner_init(&spin, gt)) + return err; + + ctx = kernel_context(gt->i915); + if (!ctx) + goto err_spin; + + err = 0; + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq[2]; + struct igt_live_test t; + int n; + + if (prio && !intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(ctx, engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + /* + * Setup the pair of contexts such that if we + * lite-restore using the RING_TAIL from ce[1] it + * will execute garbage from ce[0]->ring. + */ + memset(tmp->ring->vaddr, + POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ + tmp->ring->vma->size); + + ce[n] = tmp; + } + GEM_BUG_ON(!ce[1]->ring->size); + intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + __execlists_update_reg_state(ce[1], engine); + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_ce; + } + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); + + if (!igt_wait_for_spinner(&spin, rq[0])) { + i915_request_put(rq[0]); + goto err_ce; + } + + rq[1] = i915_request_create(ce[1]); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + i915_request_put(rq[0]); + goto err_ce; + } + + if (!prio) { + /* + * Ensure we do the switch to ce[1] on completion. + * + * rq[0] is already submitted, so this should reduce + * to a no-op (a wait on a request on the same engine + * uses the submit fence, not the completion fence), + * but it will install a dependency on rq[1] for rq[0] + * that will prevent the pair being reordered by + * timeslicing. + */ + i915_request_await_dma_fence(rq[1], &rq[0]->fence); + } + + i915_request_get(rq[1]); + i915_request_add(rq[1]); + GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); + i915_request_put(rq[0]); + + if (prio) { + struct i915_sched_attr attr = { + .priority = prio, + }; + + /* Alternatively preempt the spinner with ce[1] */ + engine->schedule(rq[1], &attr); + } + + /* And switch back to ce[0] for good measure */ + rq[0] = i915_request_create(ce[0]); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + i915_request_put(rq[1]); + goto err_ce; + } + + i915_request_await_dma_fence(rq[0], &rq[1]->fence); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + +err_ce: + tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + kernel_context_close(ctx); +err_spin: + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_switch(void *arg) +{ + return live_unlite_restore(arg, 0); +} + +static int live_unlite_preempt(void *arg) +{ + return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -131,7 +321,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) if (IS_ERR(rq)) goto out_ctx; - err = emit_semaphore_chain(rq, vma, idx); + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = emit_semaphore_chain(rq, vma, idx); + if (err == 0) + i915_request_get(rq); i915_request_add(rq); if (err) rq = ERR_PTR(err); @@ -144,10 +340,10 @@ out_ctx: static int release_queue(struct intel_engine_cs *engine, struct i915_vma *vma, - int idx) + int idx, int prio) { struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + .priority = prio, }; struct i915_request *rq; u32 *cs; @@ -168,9 +364,15 @@ release_queue(struct intel_engine_cs *engine, *cs++ = 1; intel_ring_advance(rq, cs); + + i915_request_get(rq); i915_request_add(rq); + local_bh_disable(); engine->schedule(rq, &attr); + local_bh_enable(); /* kick tasklet */ + + i915_request_put(rq); return 0; } @@ -189,8 +391,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, if (IS_ERR(head)) return PTR_ERR(head); - i915_request_get(head); - for_each_engine(engine, outer->i915, id) { + for_each_engine(engine, outer->gt, id) { for (i = 0; i < count; i++) { struct i915_request *rq; @@ -199,15 +400,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer, err = PTR_ERR(rq); goto out; } + + i915_request_put(rq); } } - err = release_queue(outer, vma, n); + err = release_queue(outer, vma, n, INT_MAX); if (err) goto out; - if (i915_request_wait(head, - I915_WAIT_LOCKED, + if (i915_request_wait(head, 0, 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); @@ -223,9 +425,8 @@ out: static int live_timeslice_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; struct i915_vma *vma; void *vaddr; int err = 0; @@ -239,17 +440,14 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -269,7 +467,7 @@ static int live_timeslice_preempt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_has_preemption(engine)) continue; @@ -279,7 +477,7 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_pin; } @@ -292,22 +490,168 @@ err_map: i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + return rq; +} + +static void wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + do { + cond_resched(); + intel_engine_flush_submission(engine); + } while (!i915_request_is_active(rq)); +} + +static long timeslice_threshold(const struct intel_engine_cs *engine) +{ + return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; +} + +static int live_timeslice_queue(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * Make sure that even if ELSP[0] and ELSP[1] are filled with + * timeslicing between them disabled, we *do* enable timeslicing + * if the queue demands it. (Normally, we do not submit if + * ELSP[1] is already occupied, so must rely on timeslicing to + * eject ELSP[0] in favour of the queue.) + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq, *nop; + + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + /* ELSP[0]: semaphore wait */ + rq = semaphore_queue(engine, vma, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + engine->schedule(rq, &attr); + wait_for_submit(engine, rq); + + /* ELSP[1]: nop request */ + nop = nop_request(engine); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_request_put(rq); + goto err_pin; + } + wait_for_submit(engine, nop); + i915_request_put(nop); + + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Queue: semaphore signal, matching priority as semaphore */ + err = release_queue(engine, vma, 1, effective_prio(rq)); + if (err) { + i915_request_put(rq); + goto err_pin; + } + + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.timer.expires) && + !i915_request_completed(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + GEM_TRACE_DUMP(); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EINVAL; + } + + /* Timeslice every jiffy, so within 2 we should signal */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to timeslice into queue\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EIO; + } + i915_request_put(rq); + if (err) + break; + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); return err; } static int live_busywait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; u32 *map; @@ -316,22 +660,19 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) - goto err_unlock; + return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_ctx_lo; @@ -343,7 +684,7 @@ static int live_busywait_preempt(void *arg) goto err_obj; } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_map; @@ -353,7 +694,7 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *lo, *hi; struct igt_live_test t; u32 *cs; @@ -364,7 +705,7 @@ static int live_busywait_preempt(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_vma; } @@ -444,7 +785,7 @@ static int live_busywait_preempt(void *arg) i915_request_add(hi); if (i915_request_wait(lo, 0, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to preempt semaphore busywait!\n", engine->name); @@ -452,7 +793,7 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } @@ -475,9 +816,6 @@ err_ctx_lo: kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -501,49 +839,45 @@ spinner_create_request(struct igt_spinner *spin, static int live_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -559,7 +893,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -576,7 +910,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -599,54 +933,47 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static int live_late_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = I915_USER_PRIORITY(1); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -705,15 +1032,12 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -723,14 +1047,13 @@ struct preempt_client { struct i915_gem_context *ctx; }; -static int preempt_client_init(struct drm_i915_private *i915, - struct preempt_client *c) +static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(i915); + c->ctx = kernel_context(gt->i915); if (!c->ctx) return -ENOMEM; - if (igt_spinner_init(&c->spin, &i915->gt)) + if (igt_spinner_init(&c->spin, gt)) goto err_ctx; return 0; @@ -748,11 +1071,10 @@ static void preempt_client_fini(struct preempt_client *c) static int live_nopreempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -760,19 +1082,16 @@ static int live_nopreempt(void *arg) * that may be being observed and not want to be interrupted. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; if (!intel_engine_has_preemption(engine)) @@ -832,7 +1151,7 @@ static int live_nopreempt(void *arg) goto err_wedged; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -841,29 +1160,344 @@ err_client_b: preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->b.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_hostile(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + int err; + + /* Preempt cancel non-preemptible spinner in ELSP0 */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); /* force reset */ + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(arg->engine->i915)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct intel_gt *gt = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (preempt_client_init(gt, &data.a)) + return -ENOMEM; + if (preempt_client_init(gt, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, gt, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + + err = __cancel_hostile(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(gt); + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) }; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -873,30 +1507,31 @@ static int live_suppress_self_preempt(void *arg) * completion event. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; /* presume black blox */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(gt->i915)) return 0; /* GVT forces single port & request submission */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) goto err_client_a; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; int depth; if (!intel_engine_has_preemption(engine)) continue; + if (igt_flush_test(gt->i915)) + goto err_wedged; + + intel_engine_pm_get(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -904,12 +1539,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -921,6 +1558,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_b); @@ -931,6 +1569,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -944,11 +1583,13 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); + intel_engine_pm_put(engine); err = -EINVAL; goto err_client_b; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -957,15 +1598,12 @@ err_client_b: preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } @@ -984,9 +1622,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) if (!rq) return NULL; - INIT_LIST_HEAD(&rq->active_list); rq->engine = engine; + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->fence.cb_list); + rq->fence.lock = &rq->lock; + rq->fence.ops = &i915_fence_ops; + i915_sched_node_init(&rq->sched); /* mark this request as permanently incomplete */ @@ -1021,11 +1663,10 @@ static void dummy_request_free(struct i915_request *dummy) static int live_suppress_wait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct preempt_client client[4]; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; int i; @@ -1035,22 +1676,19 @@ static int live_suppress_wait_preempt(void *arg) * not needlessly generate preempt-to-idle cycles. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; - if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ + if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ + return -ENOMEM; + if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ goto err_client_0; - if (preempt_client_init(i915, &client[2])) /* head of queue */ + if (preempt_client_init(gt, &client[2])) /* head of queue */ goto err_client_1; - if (preempt_client_init(i915, &client[3])) /* bystander */ + if (preempt_client_init(gt, &client[3])) /* bystander */ goto err_client_2; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { int depth; if (!intel_engine_has_preemption(engine)) @@ -1079,8 +1717,8 @@ static int live_suppress_wait_preempt(void *arg) } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&rq[i]->timeline->last_request, - dummy); + __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + &dummy->fence); i915_request_add(rq[i]); } @@ -1105,7 +1743,7 @@ static int live_suppress_wait_preempt(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -1128,26 +1766,22 @@ err_client_1: preempt_client_fini(&client[1]); err_client_0: preempt_client_fini(&client[0]); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; } static int live_chain_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1156,19 +1790,16 @@ static int live_chain_preempt(void *arg) * the previously submitted spinner in B. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &hi)) - goto err_unlock; + if (preempt_client_init(gt, &hi)) + return -ENOMEM; - if (preempt_client_init(i915, &lo)) + if (preempt_client_init(gt, &lo)) goto err_client_hi; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; @@ -1199,7 +1830,7 @@ static int live_chain_preempt(void *arg) goto err_wedged; } - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_wedged; } @@ -1237,7 +1868,7 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to preempt over chain of %d\n", count); @@ -1253,7 +1884,7 @@ static int live_chain_preempt(void *arg) i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to flush low priority chain of %d requests\n", count); @@ -1274,57 +1905,50 @@ err_client_lo: preempt_client_fini(&lo); err_client_hi: preempt_client_fini(&hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_lo; } static int live_preempt_hang(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq; if (!intel_engine_has_preemption(engine)) @@ -1341,7 +1965,7 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -1363,28 +1987,28 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_reset(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx_lo; } @@ -1399,9 +2023,105 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_preempt_timeout(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Check that we force preemption to occur by cancelling the previous + * context if it refuses to yield the GPU. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin_lo, gt)) + return -ENOMEM; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + unsigned long saved_timeout; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + /* Flush the previous CS ack before changing timeouts */ + while (READ_ONCE(engine->execlists.pending[0])) + cpu_relax(); + + saved_timeout = engine->props.preempt_timeout_ms; + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + engine->props.preempt_timeout_ms = saved_timeout; + + if (i915_request_wait(rq, 0, HZ / 10) < 0) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_lo); + i915_request_put(rq); + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); return err; } @@ -1416,7 +2136,7 @@ static int random_priority(struct rnd_state *rnd) } struct preempt_smoke { - struct drm_i915_private *i915; + struct intel_gt *gt; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; @@ -1440,7 +2160,11 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, ctx->vm, NULL); + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1489,11 +2213,9 @@ static int smoke_crescendo_thread(void *arg) struct i915_gem_context *ctx = smoke_context(smoke); int err; - mutex_lock(&smoke->i915->drm.struct_mutex); err = smoke_submit(smoke, ctx, count % I915_PRIORITY_MAX, smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); if (err) return err; @@ -1514,9 +2236,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - mutex_unlock(&smoke->i915->drm.struct_mutex); - - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; if (!(flags & BATCH)) @@ -1532,8 +2252,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) get_task_struct(tsk[id]); } + yield(); /* start all threads before we kthread_stop() */ + count = 0; - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { int status; if (IS_ERR_OR_NULL(tsk[id])) @@ -1548,11 +2270,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) put_task_struct(tsk[id]); } - mutex_lock(&smoke->i915->drm.struct_mutex); - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } @@ -1564,7 +2284,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count = 0; do { - for_each_engine(smoke->engine, smoke->i915, id) { + for_each_engine(smoke->engine, smoke->gt, id) { struct i915_gem_context *ctx = smoke_context(smoke); int err; @@ -1580,25 +2300,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } static int live_preempt_smoke(void *arg) { struct preempt_smoke smoke = { - .i915 = arg, + .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; struct igt_live_test t; int err = -ENOMEM; u32 *cs; int n; - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) return 0; smoke.contexts = kmalloc_array(smoke.ncontext, @@ -1607,13 +2326,11 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); - - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); + smoke.batch = + i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); - goto err_unlock; + goto err_free; } cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); @@ -1627,13 +2344,13 @@ static int live_preempt_smoke(void *arg) i915_gem_object_flush_map(smoke.batch); i915_gem_object_unpin_map(smoke.batch); - if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { + if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { err = -EIO; goto err_batch; } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915); if (!smoke.contexts[n]) goto err_ctx; } @@ -1660,15 +2377,13 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); +err_free: kfree(smoke.contexts); return err; } -static int nop_virtual_engine(struct drm_i915_private *i915, +static int nop_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling, unsigned int nctx, @@ -1687,7 +2402,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(i915); + ctx[n] = kernel_context(gt->i915); if (!ctx[n]) { err = -ENOMEM; nctx = n; @@ -1712,7 +2427,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, } } - err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); if (err) goto out; @@ -1759,7 +2474,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); break; } } @@ -1781,7 +2496,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { @@ -1794,25 +2509,22 @@ out: static int live_virtual_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; - struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; - int err = -ENODEV; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - err = nop_virtual_engine(i915, &engine, 1, 1, 0); + for_each_engine(engine, gt, id) { + err = nop_virtual_engine(gt, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); - goto out_unlock; + return err; } } @@ -1830,23 +2542,21 @@ static int live_virtual_engine(void *arg) continue; for (n = 1; n <= nsibling + 1; n++) { - err = nop_virtual_engine(i915, siblings, nsibling, + err = nop_virtual_engine(gt, siblings, nsibling, n, 0); if (err) - goto out_unlock; + return err; } - err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } -static int mask_virtual_engine(struct drm_i915_private *i915, +static int mask_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) { @@ -1862,7 +2572,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; @@ -1876,7 +2586,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, if (err) goto out_put; - err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); if (err) goto out_unpin; @@ -1907,7 +2617,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto out; } @@ -1922,11 +2632,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } err = igt_live_test_end(&t); - if (err) - goto out; - out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -1943,17 +2650,14 @@ out_close: static int live_virtual_mask(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; @@ -1967,17 +2671,166 @@ static int live_virtual_mask(void *arg) if (nsibling < 2) continue; - err = mask_virtual_engine(i915, siblings, nsibling); + err = mask_virtual_engine(gt, siblings, nsibling); if (err) - goto out_unlock; + return err; + } + + return 0; +} + +static int preserved_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *last = NULL; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + unsigned int n; + int err = 0; + u32 *cs; + + ctx = kernel_context(gt->i915); + if (!ctx) + return -ENOMEM; + + scratch = create_scratch(siblings[0]->gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < NUM_GPR_DW; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_end; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(ctx); return err; } -static int bond_virtual_engine(struct drm_i915_private *i915, +static int live_virtual_preserved(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(gt->i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int bond_virtual_engine(struct intel_gt *gt, unsigned int class, struct intel_engine_cs **siblings, unsigned int nsibling, @@ -1993,13 +2846,13 @@ static int bond_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; err = 0; rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, i915, id) { + for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; if (master->class == class) @@ -2104,7 +2957,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; kernel_context_close(ctx); @@ -2121,17 +2974,14 @@ static int live_virtual_bond(void *arg) { "schedule", BOND_SCHEDULE }, { }, }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { const struct phase *p; int nsibling; @@ -2148,38 +2998,42 @@ static int live_virtual_bond(void *arg) continue; for (p = phases; p->name; p++) { - err = bond_virtual_engine(i915, + err = bond_virtual_engine(gt, class, siblings, nsibling, p->flags); if (err) { pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", __func__, p->name, class, nsibling, err); - goto out_unlock; + return err; } } } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_unlite_switch), + SUBTEST(live_unlite_preempt), SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), + SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), }; @@ -2189,5 +3043,512 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); +} + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *mem; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + mem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!mem) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt, id) { + u32 *hw, *lrc; + int dw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + lrc = memset(mem, 0, PAGE_SIZE); + execlists_init_reg_state(lrc, + engine->kernel_context, + engine, + engine->kernel_context->ring, + true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + hexdump(lrc, PAGE_SIZE); + } + + i915_gem_object_unpin_map(engine->default_state); + if (err) + break; + } + + kfree(mem); + return err; +} + +static int find_offset(const u32 *lri, u32 offset) +{ + int i; + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + if (lri[i] == offset) + return i; + + return -1; +} + +static int live_lrc_fixed(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check the assumed register offsets match the actual locations in + * the context image. + */ + + for_each_engine(engine, gt, id) { + const struct { + u32 reg; + u32 offset; + const char *name; + } tbl[] = { + { + i915_mmio_reg_offset(RING_START(engine->mmio_base)), + CTX_RING_BUFFER_START - 1, + "RING_START" + }, + { + i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), + CTX_RING_BUFFER_CONTROL - 1, + "RING_CTL" + }, + { + i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), + CTX_RING_HEAD - 1, + "RING_HEAD" + }, + { + i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), + CTX_RING_TAIL - 1, + "RING_TAIL" + }, + { + i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), + lrc_ring_mi_mode(engine), + "RING_MI_MODE" + }, + { + engine->mmio_base + 0x110, + CTX_BB_STATE - 1, + "BB_STATE" + }, + { }, + }, *t; + u32 *hw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + for (t = tbl; t->name; t++) { + int dw = find_offset(hw, t->reg); + + if (dw != t->offset) { + pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", + engine->name, + t->name, + t->reg, + dw, + t->offset); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(engine->default_state); + } + + return err; +} + +static int __live_lrc_state(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt, id) { + err = __live_lrc_state(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + +static int gpr_make_dirty(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int __live_gpr_clear(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine); + if (err) + return err; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_put; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_gpr_clear(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt, id) { + err = __live_gpr_clear(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_fixed), + SUBTEST(live_lrc_state), + SUBTEST(live_gpr_clear), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 00a4f60cdfd5..6ad6aca315f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -17,7 +17,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); reset_count = i915_reset_count(>->i915->gpu_error); @@ -28,7 +28,7 @@ static int igt_global_reset(void *arg) err = -EINVAL; } - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_gt_set_wedged(gt); GEM_BUG_ON(!intel_gt_is_wedged(gt)); intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); return intel_gt_is_wedged(gt) ? -EIO : 0; @@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) @@ -125,8 +125,8 @@ static int igt_atomic_engine_reset(void *arg) if (!igt_force_reset(gt)) goto out_unlock; - for_each_engine(engine, gt->i915, id) { - tasklet_disable_nosync(&engine->execlists.tasklet); + for_each_engine(engine, gt, id) { + tasklet_disable(&engine->execlists.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { @@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 321481403165..f04a59fe5d2c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,8 +6,10 @@ #include <linux/prime_numbers.h> -#include "gem/i915_gem_pm.h" +#include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_requests.h" +#include "intel_ring.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -34,7 +36,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl) #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) struct mock_hwsp_freelist { - struct drm_i915_private *i915; + struct intel_gt *gt; struct radix_tree_root cachelines; struct intel_timeline **history; unsigned long count, max; @@ -67,7 +69,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = intel_timeline_create(&state->i915->gt, NULL); + tl = intel_timeline_create(state->gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -105,6 +107,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, static int mock_hwsp_freelist(void *arg) { struct mock_hwsp_freelist state; + struct drm_i915_private *i915; const struct { const char *name; unsigned int flags; @@ -116,12 +119,14 @@ static int mock_hwsp_freelist(void *arg) unsigned int na; int err = 0; + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; + state.gt = &i915->gt; /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -136,7 +141,6 @@ static int mock_hwsp_freelist(void *arg) goto err_put; } - mutex_lock(&state.i915->drm.struct_mutex); for (p = phases; p->name; p++) { pr_debug("%s(%s)\n", __func__, p->name); for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { @@ -149,10 +153,9 @@ static int mock_hwsp_freelist(void *arg) out: for (na = 0; na < state.max; na++) __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); kfree(state.history); err_put: - drm_dev_put(&state.i915->drm); + drm_dev_put(&i915->drm); return err; } @@ -449,8 +452,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - err = intel_timeline_pin(tl); if (err) { rq = ERR_PTR(err); @@ -461,10 +462,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; + i915_request_get(rq); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); - if (err) + if (err) { + i915_request_put(rq); rq = ERR_PTR(err); + } out_unpin: intel_timeline_unpin(tl); @@ -475,11 +480,11 @@ out: } static struct intel_timeline * -checked_intel_timeline_create(struct drm_i915_private *i915) +checked_intel_timeline_create(struct intel_gt *gt) { struct intel_timeline *tl; - tl = intel_timeline_create(&i915->gt, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return tl; @@ -496,11 +501,10 @@ checked_intel_timeline_create(struct drm_i915_private *i915) static int live_hwsp_engine(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -515,37 +519,40 @@ static int live_hwsp_engine(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + for (n = 0; n < NUM_TIMELINES; n++) { struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } timelines[count++] = tl; + i915_request_put(rq); } + + intel_engine_pm_put(engine); + if (err) + break; } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -559,11 +566,7 @@ out: intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -571,11 +574,10 @@ out: static int live_hwsp_alternate(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -591,25 +593,25 @@ static int live_hwsp_alternate(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_timeline *tl; struct i915_request *rq; if (!intel_engine_can_store_dword(engine)) continue; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { + intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } + intel_engine_pm_get(engine); rq = tl_write(tl, engine, count); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); @@ -617,11 +619,12 @@ static int live_hwsp_alternate(void *arg) } timelines[count++] = tl; + i915_request_put(rq); } } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -635,22 +638,17 @@ out: intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } static int live_hwsp_wrap(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = 0; /* @@ -658,14 +656,10 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + tl = intel_timeline_create(gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); - tl = intel_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; @@ -673,7 +667,7 @@ static int live_hwsp_wrap(void *arg) if (err) goto out_free; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -681,7 +675,9 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; @@ -743,29 +739,24 @@ static int live_hwsp_wrap(void *arg) goto out; } - i915_retire_requests(i915); /* recycle HWSP */ + intel_gt_retire_requests(gt); /* recycle HWSP */ } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; intel_timeline_unpin(tl); out_free: intel_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } static int live_hwsp_recycle(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count; int err = 0; @@ -775,38 +766,38 @@ static int live_hwsp_recycle(void *arg) * want to confuse ourselves or the GPU. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { IGT_TIMEOUT(end_time); if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + do { struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Wait for timeline writes timed out!\n"); + i915_request_put(rq); intel_timeline_put(tl); err = -EIO; - goto out; + break; } if (*tl->hwsp_seqno != count) { @@ -815,17 +806,18 @@ static int live_hwsp_recycle(void *arg) err = -EINVAL; } + i915_request_put(rq); intel_timeline_put(tl); count++; if (err) - goto out; + break; } while (!__igt_timeout(end_time, NULL)); - } -out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + intel_engine_pm_put(engine); + if (err) + break; + } return err; } @@ -842,5 +834,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index d06d68ac2a3b..abce6e4ec9c0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -33,8 +33,32 @@ struct wa_lists { } engine[I915_NUM_ENGINES]; }; +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + i915_request_put(rq); + + return err; +} + static void -reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -42,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); - gt_init_workarounds(i915, &lists->gt_wa_list); + gt_init_workarounds(gt->i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; wa_init_start(wal, "REF", engine->name); @@ -59,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) } static void -reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) intel_wa_list_free(&lists->engine[id].wa_list); intel_wa_list_free(&lists->gt_wa_list); @@ -191,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */ + intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (intel_gt_is_wedged(&ctx->i915->gt)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; if (err) goto out_put; @@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; - intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); @@ -255,12 +278,9 @@ switch_to_scratch_context(struct intel_engine_cs *engine, ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - kernel_context_close(ctx); if (IS_ERR(rq)) { spin = NULL; @@ -268,17 +288,12 @@ switch_to_scratch_context(struct intel_engine_cs *engine, goto err; } - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - + err = request_add_spin(rq, spin); err: if (err && spin) igt_spinner_end(spin); + kernel_context_close(ctx); return err; } @@ -313,7 +328,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out_spin; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); igt_spinner_end(&spin); @@ -355,6 +370,7 @@ out_ctx: static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_vma *vma; int err; @@ -362,7 +378,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -463,12 +481,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; + struct i915_address_space *vm; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); + vm = i915_gem_context_get_vm_rcu(ctx); + scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); + i915_vm_put(vm); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -492,6 +513,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, ro_reg = ro_register(reg); + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) @@ -565,6 +589,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + err = engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, 0); @@ -572,15 +604,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = request_add_sync(rq, err); + if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - intel_gt_set_wedged(&ctx->i915->gt); - err = -EIO; + intel_gt_set_wedged(engine->gt); goto out_batch; } @@ -668,7 +696,7 @@ out_unpin: break; } - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(ctx->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -679,36 +707,29 @@ out_scratch: static int live_dirty_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; int err = 0; /* Can the user write to the whitelisted registers? */ - if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + file = mock_file(gt->i915); + if (IS_ERR(file)) + return PTR_ERR(file); - mutex_unlock(&i915->drm.struct_mutex); - file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } - - ctx = live_context(i915, file); + ctx = live_context(gt->i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (engine->whitelist.count == 0) continue; @@ -718,45 +739,43 @@ static int live_dirty_whitelist(void *arg) } out_file: - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mock_file_free(gt->i915, file); return err; } static int live_reset_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ + igt_global_reset_lock(gt); - if (!engine || engine->whitelist.count == 0) - return 0; - - igt_global_reset_lock(&i915->gt); + for_each_engine(engine, gt, id) { + if (engine->whitelist.count == 0) + continue; - if (intel_has_reset_engine(i915)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); - if (err) - goto out; - } + if (intel_has_reset_engine(gt)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + if (err) + goto out; + } - if (intel_has_gpu_reset(i915)) { - err = check_whitelist_across_reset(engine, - do_device_reset, - "device"); - if (err) - goto out; + if (intel_has_gpu_reset(gt)) { + err = check_whitelist_across_reset(engine, + do_device_reset, + "device"); + if (err) + goto out; + } } out: - igt_global_reset_unlock(&i915->gt); + igt_global_reset_unlock(gt); return err; } @@ -772,6 +791,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, if (IS_ERR(rq)) return PTR_ERR(rq); + i915_vma_lock(results); + err = i915_request_await_object(rq, results->obj, true); + if (err == 0) + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(results); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) srm++; @@ -786,8 +813,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear access permission field */ - reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; *cs++ = srm; *cs++ = reg; @@ -797,12 +824,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, intel_ring_advance(rq, cs); err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; - - return err; + return request_add_sync(rq, err); } static int scrub_whitelisted_registers(struct i915_gem_context *ctx, @@ -830,6 +852,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, if (ro_register(reg)) continue; + /* Clear non priv flags */ + reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + *cs++ = reg; *cs++ = 0xffffffff; } @@ -850,13 +875,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + /* Perform the writes from an unprivileged "user" batch */ err = engine->emit_bb_start(rq, batch->node.start, 0, 0); err_request: - i915_request_add(rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; + err = request_add_sync(rq, err); err_unpin: i915_gem_object_unpin_map(batch->obj); @@ -973,7 +1004,7 @@ err_a: static int live_isolated_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct { struct i915_gem_context *ctx; struct i915_vma *scratch[2]; @@ -987,40 +1018,46 @@ static int live_isolated_whitelist(void *arg) * invisible to a second context. */ - if (!intel_engines_has_context_isolation(i915)) - return 0; - - if (!i915->kernel_context->vm) + if (!intel_engines_has_context_isolation(gt->i915)) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_address_space *vm; struct i915_gem_context *c; - c = kernel_context(i915); + c = kernel_context(gt->i915); if (IS_ERR(c)) { err = PTR_ERR(c); goto err; } - client[i].scratch[0] = create_scratch(c->vm, 1024); + vm = i915_gem_context_get_vm_rcu(c); + + client[i].scratch[0] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); + i915_vm_put(vm); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(c->vm, 1024); + client[i].scratch[1] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); + i915_vm_put(vm); kernel_context_close(c); goto err; } client[i].ctx = c; + i915_vm_put(vm); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { + if (!engine->kernel_context->vm) + continue; + if (!whitelist_writable_count(engine)) continue; @@ -1074,7 +1111,7 @@ err: kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; @@ -1109,16 +1146,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, static int live_gpu_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1126,25 +1163,25 @@ live_gpu_reset_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) goto out; - intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); out: i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); return ok ? 0 : -ESRCH; } @@ -1152,7 +1189,7 @@ out: static int live_engine_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; @@ -1162,17 +1199,17 @@ live_engine_reset_workarounds(void *arg) struct wa_lists lists; int ret = 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct intel_engine_cs *engine = ce->engine; @@ -1205,12 +1242,10 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { + ret = request_add_spin(rq, &spin); + if (ret) { pr_err("Spinner failed to start\n"); igt_spinner_fini(&spin); - ret = -ETIMEDOUT; goto err; } @@ -1227,12 +1262,12 @@ live_engine_reset_workarounds(void *arg) } err: i915_gem_context_unlock_engines(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); kernel_context_close(ctx); - igt_flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(gt->i915); return ret; } @@ -1246,14 +1281,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gpu_reset_workarounds), SUBTEST(live_engine_reset_workarounds), }; - int err; if (intel_gt_is_wedged(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 598170efcaf6..2a77c051f36a 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 249c747e9756..3ee4a4e7689d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -4,11 +4,34 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_gt_irq.h" +#include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" +/** + * DOC: GuC + * + * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is + * designed to offload some of the functionality usually performed by the host + * driver; currently the main operations it can take care of are: + * + * - Authentication of the HuC, which is required to fully enable HuC usage. + * - Low latency graphics context scheduling (a.k.a. GuC submission). + * - GT Power management. + * + * The enable_guc module parameter can be used to select which of those + * operations to enable within GuC. Note that not all the operations are + * supported on all gen9+ platforms. + * + * Enabling the GuC is not mandatory and therefore the firmware is only loaded + * if at least one of the operations is selected. However, not loading the GuC + * might result in the loss of some features that do require the GuC (currently + * just the HuC, but more are expected to land in the future). + */ + static void gen8_guc_raise_irq(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -56,6 +79,93 @@ void intel_guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } +static void gen9_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); + spin_unlock_irq(>->irq_lock); +} + +static void gen9_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & + gt->pm_guc_events); + guc->interrupts.enabled = true; + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); + } + spin_unlock_irq(>->irq_lock); +} + +static void gen9_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(>->i915->runtime_pm); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen9_reset_guc_interrupts(guc); +} + +static void gen11_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + spin_unlock_irq(>->irq_lock); +} + +static void gen11_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, + GEN11_GUC_SG_INTR_MASK, ~events); + guc->interrupts.enabled = true; + } + spin_unlock_irq(>->irq_lock); +} + +static void gen11_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen11_reset_guc_interrupts(guc); +} + void intel_guc_init_early(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -82,32 +192,6 @@ void intel_guc_init_early(struct intel_guc *guc) } } -static int guc_shared_data_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); - } - - guc->shared_data = vma; - guc->shared_data_vaddr = vaddr; - - return 0; -} - -static void guc_shared_data_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); -} - static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -254,14 +338,9 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fetch; - ret = guc_shared_data_create(guc); - if (ret) - goto err_fw; - GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(&guc->log); if (ret) - goto err_shared; + goto err_fw; ret = intel_guc_ads_create(guc); if (ret) @@ -296,8 +375,6 @@ err_ads: intel_guc_ads_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); -err_shared: - guc_shared_data_destroy(guc); err_fw: intel_uc_fw_fini(&guc->fw); err_fetch: @@ -322,7 +399,6 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); - guc_shared_data_destroy(guc); intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } @@ -478,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -518,19 +601,9 @@ int intel_guc_suspend(struct intel_guc *guc) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine) { - u32 data[7]; - - GEM_BUG_ON(!guc->execbuf_client); - - data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; - data[1] = engine->guc_id; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = guc->execbuf_client->stage_id; - data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); + /* XXX: to be implemented with submission interface rework */ - return intel_guc_send(guc, data, ARRAY_SIZE(data)); + return -ENODEV; } /** @@ -544,13 +617,27 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } /** - * DOC: GuC Address Space + * DOC: GuC Memory Management * - * The layout of GuC address space is shown below: + * GuC can't allocate any memory for its own usage, so all the allocations must + * be handled by the host driver. GuC accesses the memory via the GGTT, with the + * exception of the top and bottom parts of the 4GB address space, which are + * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM) + * or other parts of the HW. The driver must take care not to place objects that + * the GuC is going to access in these reserved ranges. The layout of the GuC + * address space is shown below: * * :: * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b2f046d3cc3..e6400204a2bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -47,8 +47,6 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; struct ida stage_ids; - struct i915_vma *shared_data; - void *shared_data_vaddr; struct intel_guc_client *execbuf_client; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 1d3cdd67ca2f..a26a85d50209 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -548,6 +548,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, @@ -556,7 +557,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 36332064de9c..caed0d57e704 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (WARN_ON(!intel_guc_log_relay_enabled(log))) + if (WARN_ON(!intel_guc_log_relay_created(log))) goto out_unlock; /* Get the pointer to shared GuC log buffer */ @@ -361,6 +361,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); INIT_WORK(&log->relay.flush_work, capture_logs_work); + log->relay.started = false; } static int guc_log_relay_create(struct intel_guc_log *log) @@ -546,7 +547,7 @@ out_unlock: return ret; } -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +bool intel_guc_log_relay_created(const struct intel_guc_log *log) { return log->relay.buf_addr; } @@ -560,7 +561,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (intel_guc_log_relay_enabled(log)) { + if (intel_guc_log_relay_created(log)) { ret = -EEXIST; goto out_unlock; } @@ -585,6 +586,21 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; +} + +int intel_guc_log_relay_start(struct intel_guc_log *log) +{ + if (log->relay.started) + return -EEXIST; + guc_log_enable_flush_events(log); /* @@ -594,47 +610,59 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) */ queue_work(system_highpri_wq, &log->relay.flush_work); - return 0; + log->relay.started = true; -out_relay: - guc_log_relay_destroy(log); -out_unlock: - mutex_unlock(&log->relay.lock); - - return ret; + return 0; } void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; + if (!log->relay.started) + return; + /* * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. */ flush_work(&log->relay.flush_work); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); } -void intel_guc_log_relay_close(struct intel_guc_log *log) +/* + * Stops the relay log. Called from intel_guc_log_relay_close(), so no + * possibility of race with start/flush since relay_write cannot race + * relay_close. + */ +static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + if (!log->relay.started) + return; + guc_log_disable_flush_events(log); intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); + log->relay.started = false; +} + +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_relay_stop(log); + mutex_lock(&log->relay.lock); - GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + GEM_BUG_ON(!intel_guc_log_relay_created(log)); guc_log_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 6f764879acb1..c252c022c5fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -47,6 +47,7 @@ struct intel_guc_log { struct i915_vma *vma; struct { void *buf_addr; + bool started; struct work_struct flush_work; struct rchan *channel; struct mutex lock; @@ -65,8 +66,9 @@ int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); -bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +bool intel_guc_log_relay_created(const struct intel_guc_log *log); int intel_guc_log_relay_open(struct intel_guc_log *log); +int intel_guc_log_relay_start(struct intel_guc_log *log); void intel_guc_log_relay_flush(struct intel_guc_log *log); void intel_guc_log_relay_close(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index edf194d23c6b..1949346e714e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -83,6 +83,9 @@ #define GEN8_GTCR _MMIO(0x4274) #define GEN8_GTCR_INVALIDATE (1<<0) +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + #define GUC_ARAT_C6DIS _MMIO(0xA178) #define GUC_SHIM_CONTROL _MMIO(0xc064) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f325d3dd564f..2498c55e0ea5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,12 +6,13 @@ #include <linux/circ_buf.h> #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_ring.h" + #include "intel_guc_submission.h" #include "i915_drv.h" @@ -29,6 +30,12 @@ enum { /** * DOC: GuC-based command submission * + * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC + * firmware is moving to an updated submission interface and we plan to + * turn submission back on when that lands. The below documentation (and related + * code) matches the old submission model and will be updated as part of the + * upgrade to the new flow. + * * GuC client: * A intel_guc_client refers to a submission path through GuC. Currently, there * is only one client, which is charged with all submissions to the GuC. This @@ -1004,7 +1011,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1014,7 +1021,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) * to GuC */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ @@ -1050,7 +1057,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1062,7 +1069,7 @@ static void guc_interrupts_release(struct intel_gt *gt) */ irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route all GT interrupts to the host */ @@ -1119,7 +1126,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) enum intel_engine_id id; int err; - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -1145,7 +1152,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { engine->set_default_submission = guc_set_default_submission; engine->set_default_submission(engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d4625c97b4f9..32a069841c14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -9,6 +9,34 @@ #include "intel_huc.h" #include "i915_drv.h" +/** + * DOC: HuC + * + * The HuC is a dedicated microcontroller for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can directly use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * The kernel driver is only responsible for loading the HuC firmware and + * triggering its security authentication, which is performed by the GuC. For + * The GuC to correctly perform the authentication, the HuC binary must be + * loaded before the GuC one. Loading the HuC is optional; however, not using + * the HuC might negatively impact power usage and/or performance of media + * workloads, depending on the use-cases. + * + * See https://github.com/intel/media-driver for the latest details on HuC + * functionality. + */ + +/** + * DOC: HuC Memory Management + * + * Similarly to the GuC, the HuC can't do any memory allocations on its own, + * with the difference being that the allocations for HuC usage are handled by + * the userspace driver instead of the kernel one. The HuC accesses the memory + * via the PPGTT belonging to the context loaded on the VCS executing the + * HuC-specific commands. + */ + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -35,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) void *vaddr; int err; - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc) * * Called after HuC and GuC firmware loading during intel_uc_init_hw(). * - * This function pins HuC firmware image object into GGTT. - * Then it invokes GuC action to authenticate passing the offset to RSA - * signature through intel_guc_auth_huc(). It then waits for 50ms for - * firmware verification ACK and unpins the object. + * This function invokes the GuC action to authenticate the HuC firmware, + * passing the offset of the RSA signature to intel_guc_auth_huc(). It then + * waits for up to 50ms for firmware verification ACK. */ int intel_huc_auth(struct intel_huc *huc) { @@ -134,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc) if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) goto fail; @@ -185,7 +212,7 @@ int intel_huc_check_status(struct intel_huc *huc) if (!intel_huc_is_supported(huc)) return -ENODEV; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) status = intel_uncore_read(gt->uncore, huc->status.reg); return (status & huc->status.mask) == huc->status.value; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 74602487ed67..d654340d4d03 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -8,21 +8,6 @@ #include "i915_drv.h" /** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - -/** * intel_huc_fw_init_early() - initializes HuC firmware struct * @huc: intel_huc struct * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 71ee7ab035cc..629b19377a29 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc) int ret; u32 guc_status; - ret = i915_inject_load_error(gt->i915, -ENXIO); + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) return ret; @@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc) GEM_BUG_ON(guc_communication_enabled(guc)); - ret = i915_inject_load_error(i915, -ENXIO); + ret = i915_inject_probe_error(i915, -ENXIO); if (ret) return ret; @@ -224,17 +224,7 @@ static int guc_enable_communication(struct intel_guc *guc) return 0; } -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; - - guc_clear_mmio_msg(guc); -} - -static void guc_disable_communication(struct intel_guc *guc) +static void __guc_stop_communication(struct intel_guc *guc) { /* * Events generated during or after CT disable are logged by guc in @@ -247,6 +237,20 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; guc->handler = intel_guc_to_host_event_handler_nop; +} + +static void guc_stop_communication(struct intel_guc *guc) +{ + intel_guc_ct_stop(&guc->ct); + + __guc_stop_communication(guc); + + DRM_INFO("GuC communication stopped\n"); +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + __guc_stop_communication(guc); intel_guc_ct_disable(&guc->ct); @@ -368,7 +372,7 @@ static int uc_init_wopcm(struct intel_uc *uc) GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); - err = i915_inject_load_error(gt->i915, -ENXIO); + err = i915_inject_probe_error(gt->i915, -ENXIO); if (err) return err; @@ -537,7 +541,9 @@ void intel_uc_fini_hw(struct intel_uc *uc) if (intel_uc_supports_guc_submission(uc)) intel_guc_submission_disable(guc); - guc_disable_communication(guc); + if (guc_communication_enabled(guc)) + guc_disable_communication(guc); + __uc_sanitize(uc); } @@ -581,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) + with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) intel_uc_runtime_suspend(uc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bd22bf11adad..66a30ab7044a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -37,27 +37,34 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, /* * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. + * + * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas + * between 33.0 and 35.2 are only related to new additions to support new Gen12 + * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) - -#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ __stringify(prefix_) name_ \ - __stringify(major_) separator_ \ - __stringify(minor_) separator_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ __stringify(patch_) ".bin" #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_) + __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) #define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_) + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) /* All blobs need to be declared via MODULE_FIRMWARE() */ #define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ @@ -218,29 +225,31 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, { bool user = e == -EINVAL; - if (i915_inject_load_error(i915, e)) { + if (i915_inject_probe_error(i915, e)) { /* non-existing blob */ uc_fw->path = "<invalid>"; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ uc_fw->major_ver_wanted += 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (i915_inject_load_error(i915, e)) { + } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ uc_fw->minor_ver_wanted += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->major_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev major version */ uc_fw->major_ver_wanted -= 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) { + } else if (uc_fw->minor_ver_wanted && + i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ uc_fw->minor_ver_wanted -= 1; uc_fw->user_overridden = user; - } else if (user && i915_inject_load_error(i915, e)) { + } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ uc_fw->major_ver_wanted = 0; uc_fw->minor_ver_wanted = 0; @@ -269,7 +278,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) GEM_BUG_ON(!i915->wopcm.size); GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); - err = i915_inject_load_error(i915, -ENXIO); + err = i915_inject_probe_error(i915, -ENXIO); if (err) return err; @@ -337,25 +346,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) } /* Get version numbers from the CSS header */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR, - css->sw_version); - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR, - css->sw_version); - break; - - default: - MISSING_CASE(uc_fw->type); - break; - } + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { @@ -400,7 +394,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) { struct drm_mm_node *node = &ggtt->uc_fw; - GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(!drm_mm_node_allocated(node)); GEM_BUG_ON(upper_32_bits(node->start)); GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); @@ -445,7 +439,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, u64 offset; int ret; - ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); + ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT); if (ret) return ret; @@ -506,7 +500,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /* make sure the status was cleared the last time we reset the uc */ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); - err = i915_inject_load_error(gt->i915, -ENOEXEC); + err = i915_inject_probe_error(gt->i915, -ENOEXEC); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index ae58e8a8c53b..029214cdedd5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -39,9 +39,6 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. - * - * The only difference between GuC and HuC firmwares is how the version - * information is saved. */ struct uc_css_header { @@ -69,11 +66,9 @@ struct uc_css_header { char username[8]; char buildnumber[12]; u32 sw_version; -#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0) -#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16) -#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0) +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) u32 reserved[14]; u32 header_info; } __packed; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bba0eafe1cdb..d8a80388bd31 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -108,23 +108,15 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client) * validating that the doorbells status expected by the driver matches what the * GuC/HW have. */ -static int igt_guc_clients(void *args) +static int igt_guc_clients(void *arg) { - struct drm_i915_private *dev_priv = args; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int err = 0; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -189,8 +181,7 @@ out: guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } @@ -201,22 +192,14 @@ unlock: */ static int igt_guc_doorbells(void *arg) { - struct drm_i915_private *dev_priv = arg; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int i, err = 0; u16 db_id; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -298,20 +281,19 @@ out: guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } -int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +int intel_guc_live_selftest(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_guc_clients), SUBTEST(igt_guc_doorbells), }; - if (!USES_GUC_SUBMISSION(dev_priv)) + if (!USES_GUC_SUBMISSION(i915)) return 0; - return i915_subtests(tests, dev_priv); + return intel_gt_live_subtests(tests, &i915->gt); } |