diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_engine_cs.c | 288 |
1 files changed, 222 insertions, 66 deletions
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1590375f31cb..2d1952849d69 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,7 +25,6 @@ #include <drm/drm_print.h> #include "i915_drv.h" -#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -230,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) break; default: MISSING_CASE(class); + /* fall through */ case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: @@ -302,6 +302,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; + if (engine->context_size) + DRIVER_CAPS(dev_priv)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -456,28 +458,16 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static bool csb_force_mmio(struct drm_i915_private *i915) -{ - /* Older GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) - return true; - - return false; -} - static void intel_engine_init_execlist(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->csb_use_mmio = csb_force_mmio(engine->i915); - execlists->port_mask = 1; BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; } /** @@ -492,6 +482,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) void intel_engine_setup_common(struct intel_engine_cs *engine) { i915_timeline_init(engine->i915, &engine->timeline, engine->name); + lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE); intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); @@ -499,7 +490,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_cmd_parser(engine); } -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +int intel_engine_create_scratch(struct intel_engine_cs *engine, + unsigned int size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -515,7 +507,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) return PTR_ERR(obj); } - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; @@ -533,7 +525,7 @@ err_unref: return ret; } -static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->scratch); } @@ -585,7 +577,7 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -645,6 +637,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } +static void __intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_context_unpin(to_intel_context(ctx, engine)); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -658,7 +656,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) */ int intel_engine_init_common(struct intel_engine_cs *engine) { - struct intel_ring *ring; + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; engine->set_default_submission(engine); @@ -670,18 +669,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = intel_context_pin(engine->i915->kernel_context, engine); - if (IS_ERR(ring)) - return PTR_ERR(ring); + ce = intel_context_pin(i915->kernel_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); /* * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (engine->i915->preempt_context) { - ring = intel_context_pin(engine->i915->preempt_context, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); + if (i915->preempt_context) { + ce = intel_context_pin(i915->preempt_context, engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); goto err_unpin_kernel; } } @@ -690,7 +689,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(engine->i915)) + if (HWS_NEEDS_PHYSICAL(i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); @@ -702,10 +701,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + err_unpin_kernel: - intel_context_unpin(engine->i915->kernel_context, engine); + __intel_context_unpin(i915->kernel_context, engine); return ret; } @@ -718,6 +718,8 @@ err_unpin_kernel: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + intel_engine_cleanup_scratch(engine); if (HWS_NEEDS_PHYSICAL(engine->i915)) @@ -732,9 +734,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); - intel_context_unpin(engine->i915->kernel_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + __intel_context_unpin(i915->kernel_context, engine); i915_timeline_fini(&engine->timeline); } @@ -769,6 +771,35 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + int err; + + if (INTEL_GEN(dev_priv) < 3) + return -ENODEV; + + GEM_TRACE("%s\n", engine->name); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + + err = 0; + if (__intel_wait_for_register_fw(dev_priv, + mode, MODE_IDLE, MODE_IDLE, + 1000, 0, + NULL)) { + GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + err = -ETIMEDOUT; + } + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + POSTING_READ_FW(mode); + + return err; +} + const char *i915_cache_level_str(struct drm_i915_private *i915, int type) { switch (type) { @@ -780,12 +811,32 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } } +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) +{ + const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu); + u32 mcr_s_ss_select; + u32 slice = fls(sseu->slice_mask); + u32 subslice = fls(sseu->subslice_mask[slice]); + + if (INTEL_GEN(dev_priv) == 10) + mcr_s_ss_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + else if (INTEL_GEN(dev_priv) >= 11) + mcr_s_ss_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + else + mcr_s_ss_select = 0; + + return mcr_s_ss_select; +} + static inline uint32_t read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { uint32_t mcr_slice_subslice_mask; uint32_t mcr_slice_subslice_select; + uint32_t default_mcr_s_ss_select; uint32_t mcr; uint32_t ret; enum forcewake_domains fw_domains; @@ -802,6 +853,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, GEN8_MCR_SUBSLICE(subslice); } + default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -812,11 +865,10 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, intel_uncore_forcewake_get__locked(dev_priv, fw_domains); mcr = I915_READ_FW(GEN8_MCR_SELECTOR); - /* - * The HW expects the slice and sublice selectors to be reset to 0 - * after reading out the registers. - */ - WARN_ON_ONCE(mcr & mcr_slice_subslice_mask); + + WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != + default_mcr_s_ss_select); + mcr &= ~mcr_slice_subslice_mask; mcr |= mcr_slice_subslice_select; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); @@ -824,6 +876,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, ret = I915_READ_FW(reg); mcr &= ~mcr_slice_subslice_mask; + mcr |= default_mcr_s_ss_select; + I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(dev_priv, fw_domains); @@ -934,11 +988,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) - return false; + if (READ_ONCE(engine->execlists.active)) { + struct tasklet_struct *t = &engine->execlists.tasklet; + + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); - /* ELSP is empty, but there are ready requests? */ - if (READ_ONCE(engine->execlists.first)) + if (READ_ONCE(engine->execlists.active)) + return false; + } + + /* ELSP is empty, but there are ready requests? E.g. after reset */ + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) return false; /* Ring stopped? */ @@ -978,8 +1045,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) */ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { - const struct i915_gem_context * const kernel_context = - engine->i915->kernel_context; + const struct intel_context *kernel_context = + to_intel_context(engine->i915->kernel_context, engine); struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -991,7 +1058,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) */ rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) - return rq->ctx == kernel_context; + return rq->hw_context == kernel_context; else return engine->last_retired_context == kernel_context; } @@ -1006,6 +1073,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) } /** + * intel_engines_sanitize: called after the GPU has lost power + * @i915: the i915 device + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_engines_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_engines_sanitize(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + for_each_engine(engine, i915, id) { + if (engine->reset.reset) + engine->reset.reset(engine, NULL); + } +} + +/** * intel_engines_park: called when the GT is transitioning from busy->idle * @i915: the i915 device * @@ -1043,6 +1132,11 @@ void intel_engines_park(struct drm_i915_private *i915) if (engine->park) engine->park(engine); + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } @@ -1060,6 +1154,16 @@ void intel_engines_unpark(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { + void *map; + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + if (engine->unpark) engine->unpark(engine); @@ -1067,6 +1171,26 @@ void intel_engines_unpark(struct drm_i915_private *i915) } } +/** + * intel_engine_lost_context: called when the GPU is reset into unknown state + * @engine: the engine + * + * We have either reset the GPU or otherwise about to lose state tracking of + * the current GPU logical state (e.g. suspend). On next use, it is therefore + * imperative that we make no presumptions about the current state and load + * from scratch. + */ +void intel_engine_lost_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); +} + bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { @@ -1151,7 +1275,7 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) rowsize, sizeof(u32), line, sizeof(line), false) >= sizeof(line)); - drm_printf(m, "%08zx %s\n", pos, line); + drm_printf(m, "[%04zx] %s\n", pos, line); prev = buf + pos; skip = false; @@ -1166,6 +1290,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, &engine->execlists; u64 addr; + if (engine->id == RCS && IS_GEN(dev_priv, 4, 7)) + drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", I915_READ(RING_START(engine->mmio_base))); drm_printf(m, "\tRING_HEAD: 0x%08x\n", @@ -1232,12 +1358,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n", read, execlists->csb_head, write, intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted)), yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); @@ -1287,6 +1411,39 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } } +static void print_request_ring(struct drm_printer *m, struct i915_request *rq) +{ + void *ring; + int size; + + drm_printf(m, + "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + + size = rq->tail - rq->head; + if (rq->tail < rq->head) + size += rq->ring->size; + + ring = kmalloc(size, GFP_ATOMIC); + if (ring) { + const void *vaddr = rq->ring->vaddr; + unsigned int head = rq->head; + unsigned int len = 0; + + if (rq->tail < head) { + len = rq->ring->size - head; + memcpy(ring, vaddr + head, len); + head = 0; + } + memcpy(ring + len, vaddr + head, size - len); + + hexdump(m, ring, size); + kfree(ring); + } +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1296,6 +1453,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq, *last; + unsigned long flags; struct rb_node *rb; int count; @@ -1336,11 +1494,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq = i915_gem_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); - drm_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); drm_printf(m, "\t\tring->head: 0x%08x\n", @@ -1351,6 +1505,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + + print_request_ring(m, rq); } rcu_read_unlock(); @@ -1362,7 +1518,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline.lock); + local_irq_save(flags); + spin_lock(&engine->timeline.lock); last = NULL; count = 0; @@ -1384,7 +1541,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, last = NULL; count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = execlists->first; rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); @@ -1404,22 +1561,21 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline.lock); + spin_unlock(&engine->timeline.lock); - spin_lock_irq(&b->rb_lock); + spin_lock(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); drm_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->rb_lock); + spin_unlock(&b->rb_lock); + local_irq_restore(flags); - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); drm_printf(m, "HWSP:\n"); @@ -1468,8 +1624,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - tasklet_disable(&execlists->tasklet); - write_seqlock_irqsave(&engine->stats.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); + write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1493,8 +1649,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock_irqrestore(&engine->stats.lock, flags); - tasklet_enable(&execlists->tasklet); + write_sequnlock(&engine->stats.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return err; } |