From ac65bdfef14a902b40ff69a35f5c604dba096547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Jun 2019 18:01:35 +0100 Subject: drm/i915: Keep rings pinned while the context is active Remember to keep the rings pinned as well as the context image until the GPU is no longer active. v2: Introduce a ring->pin_count primarily to hide the mock_ring that doesn't fit into the normal GGTT vma picture. v3: Order is important in teardown, ringbuffer submission needs to drop the pin count on the engine->kernel_context before it can gleefully free its ring. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110946 Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190619170135.15281-1-chris@chris-wilson.co.uk (cherry picked from commit 09c5ab384f6fb30f834a5777888b4486dd7f015d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_context.c | 27 ++++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 +++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ++------- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 31 ++++++++++++++++++---------- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + 5 files changed, 53 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..23120901c55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -126,6 +126,7 @@ static void intel_context_retire(struct i915_active *active) if (ce->state) __context_unpin_state(ce->state); + intel_ring_unpin(ce->ring); intel_context_put(ce); } @@ -160,27 +161,35 @@ int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) intel_context_get(ce); + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; + if (!ce->state) return 0; err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); - return err; - } + if (err) + goto err_ring; /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } + if (err) + goto err_state; } return 0; + +err_state: + __context_unpin_state(ce->state); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + i915_active_cancel(&ce->active); + return err; } void intel_context_active_release(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..43e975a26016 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -70,6 +70,18 @@ struct intel_ring { struct list_head request_list; struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + u32 head; u32 tail; u32 emit; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..82b7ace62d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1414,6 +1414,7 @@ static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) @@ -1426,7 +1427,6 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); } static void @@ -1478,13 +1478,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1488,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..12010e798868 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1149,16 +1149,16 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); + if (atomic_fetch_inc(&ring->pin_count)) + return 0; ret = i915_timeline_pin(ring->timeline); if (ret) - return ret; + goto err_unpin; flags = PIN_GLOBAL; @@ -1172,26 +1172,31 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } vma->obj->pin_global++; + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: +err_timeline: i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } @@ -1207,16 +1212,19 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ intel_ring_reset(ring, ring->tail); + GEM_BUG_ON(!ring->vma); if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); + + GEM_BUG_ON(!ring->vaddr); ring->vaddr = NULL; ring->vma->obj->pin_global--; @@ -2081,10 +2089,11 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + intel_engine_cleanup_common(engine); + intel_ring_unpin(engine->buffer); intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..486c6953dcb1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -66,6 +66,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); ring->base.timeline = &ring->timeline; + atomic_set(&ring->base.pin_count, 1); INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base); -- cgit v1.2.3 From 248f883db61283b4f5a1c92a5e27277377b09f16 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 25 Jun 2019 10:06:55 +0100 Subject: drm/i915: Disable SAMPLER_STATE prefetching on all Gen11 steppings. The Demand Prefetch workaround (binding table prefetching) only applies to Icelake A0/B0. But the Sampler Prefetch workaround needs to be applied to all Gen11 steppings, according to a programming note in the SARCHKMD documentation. Using the Intel Gallium driver, I have seen intermittent failures in the dEQP-GLES31.functional.copy_image.non_compressed.* tests. After applying this workaround, the tests reliably pass. v2: Remove the overlap with a pre-production w/a BSpec: 9663 Signed-off-by: Kenneth Graunke Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190625090655.19220-1-chris@chris-wilson.co.uk (cherry picked from commit f9a393875d3af13cc3267477746608dadb7f17c1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 15e90fd2cfdc..50c0060509a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1258,8 +1258,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH | - GEN7_DISABLE_SAMPLER_PREFETCH); + GEN7_DISABLE_DEMAND_PREFETCH); + + /* Wa_1606682166:icl */ + wa_write_or(wal, + GEN7_SARCHKMD, + GEN7_DISABLE_SAMPLER_PREFETCH); } if (IS_GEN_RANGE(i915, 9, 11)) { -- cgit v1.2.3 From 95eef14cdad150fed43147bcd4f29eea3d0a3f03 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 10 Jun 2019 11:19:14 +0300 Subject: drm/i915/perf: fix ICL perf register offsets We got the wrong offsets (could they have changed?). New values were computed off an error state by looking up the register offset in the context image as written by the HW. Signed-off-by: Lionel Landwerlin Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL") Cc: # v4.18+ Acked-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20190610081914.25428-1-lionel.g.landwerlin@intel.com (cherry picked from commit 8dcfdfb4501012a8d36d2157dc73925715f2befb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a700c5c3d167..1ae06a1b6749 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3477,9 +3477,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; - + if (IS_GEN(dev_priv, 10)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e; + } dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } } -- cgit v1.2.3 From 7d3cd66261665da491d0ee582beabe23df60f983 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 19 Jun 2019 20:08:39 +0300 Subject: drm/i915: Fix various tracepoints for gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen2 doesn't have a frame counter and apparently we no longer provide a fake .get_vblank_counter() hook for it. That means all tracepoints calling that hook will oops. Update the tracepoints to use intel_crtc_get_vblank_counter() which will gracefully fall back to using the software counter. This is actually a better approach since we now get (hopefully accurate) frame numbers in the traces. This also gets rid of the raw driver->get_vblank_counter() calls, which we need to do in order to switch to the per-crtc vblank vfuncs. v2: Deal with new tracepoints v3: Use a distinct variable name for the internal crtc iterator (Chris) Cc: Shawn Guo Cc: Daniel Vetter Fixes: 967dd4841787 ("drm: remove drm_vblank_no_hw_counter assignment from driver code") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190619170842.20579-2-ville.syrjala@linux.intel.com (cherry picked from commit 4c888e7bd26f58deb27c2e6ddc90000b89ee9393) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 4 +- drivers/gpu/drm/i915/i915_trace.h | 76 ++++++++++++---------------- 2 files changed, 35 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 30b97ded6fdd..592b92782fab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1839,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - trace_intel_pipe_enable(dev_priv, pipe); + trace_intel_pipe_enable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -1880,7 +1880,7 @@ static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) */ assert_planes_disabled(crtc); - trace_intel_pipe_disable(dev_priv, pipe); + trace_intel_pipe_disable(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f4ce643b3bc3..cce426b23a24 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -21,24 +21,22 @@ /* watermark/fifo updates */ TRACE_EVENT(intel_pipe_enable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) __array(u32, scanline, 3) __field(enum pipe, pipe) ), - TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -49,8 +47,8 @@ TRACE_EVENT(intel_pipe_enable, ); TRACE_EVENT(intel_pipe_disable, - TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), - TP_ARGS(dev_priv, pipe), + TP_PROTO(struct intel_crtc *crtc), + TP_ARGS(crtc), TP_STRUCT__entry( __array(u32, frame, 3) @@ -59,14 +57,13 @@ TRACE_EVENT(intel_pipe_disable, ), TP_fast_assign( - enum pipe _pipe; - for_each_pipe(dev_priv, _pipe) { - __entry->frame[_pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, _pipe); - __entry->scanline[_pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, _pipe)); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc *it__; + for_each_intel_crtc(&dev_priv->drm, it__) { + __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__); + __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__); } - __entry->pipe = pipe; + __entry->pipe = crtc->pipe; ), TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", @@ -89,8 +86,7 @@ TRACE_EVENT(intel_pipe_crc, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->crcs, crcs, sizeof(__entry->crcs)); ), @@ -112,9 +108,10 @@ TRACE_EVENT(intel_cpu_fifo_underrun, ), TP_fast_assign( + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pipe %c, frame=%u, scanline=%u", @@ -134,9 +131,10 @@ TRACE_EVENT(intel_pch_fifo_underrun, TP_fast_assign( enum pipe pipe = pch_transcoder; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); __entry->pipe = pipe; - __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + __entry->frame = intel_crtc_get_vblank_counter(crtc); + __entry->scanline = intel_get_crtc_scanline(crtc); ), TP_printk("pch transcoder %c, frame=%u, scanline=%u", @@ -156,12 +154,10 @@ TRACE_EVENT(intel_memory_cxsr, ), TP_fast_assign( - enum pipe pipe; - for_each_pipe(dev_priv, pipe) { - __entry->frame[pipe] = - dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); - __entry->scanline[pipe] = - intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + struct intel_crtc *crtc; + for_each_intel_crtc(&dev_priv->drm, crtc) { + __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc); + __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc); } __entry->old = old; __entry->new = new; @@ -198,8 +194,7 @@ TRACE_EVENT(g4x_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; @@ -243,8 +238,7 @@ TRACE_EVENT(vlv_wm, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->level = wm->level; __entry->cxsr = wm->cxsr; @@ -278,8 +272,7 @@ TRACE_EVENT(vlv_fifo_size, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->sprite0_start = sprite0_start; __entry->sprite1_start = sprite1_start; @@ -310,8 +303,7 @@ TRACE_EVENT(intel_update_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); @@ -338,8 +330,7 @@ TRACE_EVENT(intel_disable_plane, TP_fast_assign( __entry->pipe = crtc->pipe; __entry->name = plane->name; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); ), @@ -364,8 +355,7 @@ TRACE_EVENT(i915_pipe_update_start, TP_fast_assign( __entry->pipe = crtc->pipe; - __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, - crtc->pipe); + __entry->frame = intel_crtc_get_vblank_counter(crtc); __entry->scanline = intel_get_crtc_scanline(crtc); __entry->min = crtc->debug.min_vbl; __entry->max = crtc->debug.max_vbl; -- cgit v1.2.3 From c270cac40828eca4fb8d7c27cab1d0ac7765ff3d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sat, 29 Jun 2019 14:13:50 +0100 Subject: drm/i915: fix whitelist selftests with readonly registers When a register is readonly there is not much we can tell about its value (apart from its default value?). This can be covered by tests exercising the value of the register from userspace. For PS_INVOCATION_COUNT we've got the following piglit tests : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations Vulkan CTS tests : dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* v2: Use a local to shrink under 80cols. Signed-off-by: Lionel Landwerlin Fixes: 86554f48e511 ("drm/i915/selftests: Verify whitelist of context registers") Tested-by: Anuj Phogat Signed-off-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190629131350.31185-1-chris@chris-wilson.co.uk (cherry picked from commit 361b69051326ed0e07553315227678d00d651a9e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 9eaf030affd0..44becd9538be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -925,7 +925,12 @@ check_whitelisted_registers(struct intel_engine_cs *engine, err = 0; for (i = 0; i < engine->whitelist.count; i++) { - if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + const struct i915_wa *wa = &engine->whitelist.list[i]; + + if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD) + continue; + + if (!fn(engine, a[i], b[i], wa->reg)) err = -EINVAL; } -- cgit v1.2.3 From 6ce5bfe936ac31d5c52c4b1328d0bfda5f97e7ca Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:19 +0300 Subject: drm/i915: whitelist PS_(DEPTH|INVOCATION)_COUNT CFL:C0+ changed the status of those registers which are now blacklisted by default. This is breaking a number of CTS tests on GL & Vulkan : KHR-GL45.pipeline_statistics_query_tests_ARB.functional_fragment_shader_invocations (GL) dEQP-VK.query_pool.statistics_query.fragment_shader_invocations.* (Vulkan) v2: Only use one whitelist entry (Lionel) Bspec: 14091 Signed-off-by: Lionel Landwerlin Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-3-lionel.g.landwerlin@intel.com (cherry picked from commit 2c903da50f5a9522b134e488bd0f92646c46f3c0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 50c0060509a6..b26c3549429e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1098,10 +1098,25 @@ static void glk_whitelist_build(struct intel_engine_cs *engine) static void cfl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + if (engine->class != RENDER_CLASS) return; - gen9_whitelist_build(&engine->whitelist); + gen9_whitelist_build(w); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); } static void cnl_whitelist_build(struct intel_engine_cs *engine) -- cgit v1.2.3 From cf8f9aa1eda7d916bd23f6b8c226404deb11690c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 28 Jun 2019 15:07:20 +0300 Subject: drm/i915/icl: whitelist PS_(DEPTH|INVOCATION)_COUNT The same tests failing on CFL+ platforms are also failing on ICL. Documentation doesn't list the WaAllowPMDepthAndInvocationCountAccessFromUMD workaround for ICL but applying it fixes the same tests as CFL. v2: Use only one whitelist entry (Lionel) Signed-off-by: Lionel Landwerlin Tested-by: Anuj Phogat Cc: stable@vger.kernel.org # 6883eab27481: drm/i915: Support flags in whitlist WAs Cc: stable@vger.kernel.org Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190628120720.21682-4-lionel.g.landwerlin@intel.com (cherry picked from commit 3fe0107e45ab396342497e06b8924cdd485cde3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b26c3549429e..98dfb086320f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1144,6 +1144,19 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + + /* + * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl + * + * This covers 4 register which are next to one another : + * - PS_INVOCATION_COUNT + * - PS_INVOCATION_COUNT_UDW + * - PS_DEPTH_COUNT + * - PS_DEPTH_COUNT_UDW + */ + whitelist_reg_ext(w, PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_RD | + RING_FORCE_TO_NONPRIV_RANGE_4); break; case VIDEO_DECODE_CLASS: -- cgit v1.2.3 From fdcc789a4a0bb2ef01857095752be12b03cbb341 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 1 Jul 2019 13:44:42 +0300 Subject: drm/i915: Fix memleak in runtime wakeref tracking If we untrack wakerefs, the actual count may reach zero. However the krealloced owners array is still there and needs to be taken care of. Free the owners unconditionally to fix the leak. Fixes: bd780f37a361 ("drm/i915: Track all held rpm wakerefs") Reported-by: Juha-Pekka Heikkila Cc: Juha-Pekka Heikkila Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190701104442.9319-1-mika.kuoppala@linux.intel.com (cherry picked from commit c5f846eed2a1856b78e988eeef08215c70598ecd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 502c54428570..8d1aebc3e857 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -221,13 +221,11 @@ __untrack_all_wakerefs(struct intel_runtime_pm_debug *debug, static void dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug) { - struct drm_printer p; + if (debug->count) { + struct drm_printer p = drm_debug_printer("i915"); - if (!debug->count) - return; - - p = drm_debug_printer("i915"); - __print_intel_runtime_pm_wakeref(&p, debug); + __print_intel_runtime_pm_wakeref(&p, debug); + } kfree(debug->owners); } -- cgit v1.2.3 From d1b739f326b960631827f0ea350002c5bc8df443 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 6 Jun 2019 15:42:10 +0300 Subject: drm/i915: Deal with machines that expose less than three QGV points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SAGV is forced to disabled/min/med/max in the BIOS pcode will only hand us a single QGV point instead of the normal three. Fix the code to deal with that instead declaring the bandwidth limit to be 0 MB/s (and thus preventing any planes from being enabled). Also shrink the max_bw sturct a bit while at it, and change the deratedbw type to unsigned since the code returns the bw as an unsigned int. Since we now keep track of how many qgv points we got from pcode we can drop the earlier check added for the "pcode doesn't support the memory subsystem query" case. Cc: felix.j.degrood@intel.com Cc: Mark Janes Cc: Matt Roper Cc: Clint Taylor Fixes: c457d9cf256e ("drm/i915: Make sure we have enough memory bandwidth on ICL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110838 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190606124210.3482-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit 56e9371bc3f3e7d6c1a197a45d550b2ce6af25f6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 15 ++++++++++----- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 753ac3165061..7b908e10d32e 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -178,6 +178,8 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) clpchgroup = (sa->deburst * deinterleave / num_channels) << i; bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; + bi->num_qgv_points = qi.num_points; + for (j = 0; j < qi.num_points; j++) { const struct intel_qgv_point *sp = &qi.points[j]; int ct, bw; @@ -195,7 +197,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv) bi->deratedbw[j] = min(maxdebw, bw * 9 / 10); /* 90% */ - DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n", + DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n", i, j, bi->num_planes, bi->deratedbw[j]); } @@ -211,14 +213,17 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, { int i; - /* Did we initialize the bw limits successfully? */ - if (dev_priv->max_bw[0].num_planes == 0) - return UINT_MAX; - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { const struct intel_bw_info *bi = &dev_priv->max_bw[i]; + /* + * Pcode will not expose all QGV points when + * SAGV is forced to off/min/med/max. + */ + if (qgv_point >= bi->num_qgv_points) + return UINT_MAX; + if (num_planes >= bi->num_planes) return bi->deratedbw[qgv_point]; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc909ec5d9c3..fe7a6ec2c199 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1674,8 +1674,9 @@ struct drm_i915_private { } dram_info; struct intel_bw_info { - int num_planes; - int deratedbw[3]; + unsigned int deratedbw[3]; /* for each QGV point */ + u8 num_qgv_points; + u8 num_planes; } max_bw[6]; struct drm_private_obj bw_obj; -- cgit v1.2.3 From f691eaa4801484fffc8a2bcb24caa27fb2edcce3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Jul 2019 18:19:12 +0100 Subject: drm/i915/gtt: Defer the free for alloc error paths If we hit an error while allocating the page tables, we have to unwind the incomplete updates, and wish to free the unused pd. However, we are not allowed to be hoding the spinlock at that point, and so must use the later free to defer it until after we drop the lock. <3> [414.363795] BUG: sleeping function called from invalid context at drivers/gpu/drm/i915/i915_gem_gtt.c:472 <3> [414.364167] in_atomic(): 1, irqs_disabled(): 0, pid: 3905, name: i915_selftest <4> [414.364406] 3 locks held by i915_selftest/3905: <4> [414.364408] #0: 0000000034fe8aa8 (&dev->mutex){....}, at: device_driver_attach+0x18/0x50 <4> [414.364415] #1: 000000006bd8a560 (&dev->struct_mutex){+.+.}, at: igt_ctx_exec+0xb7/0x410 [i915] <4> [414.364476] #2: 000000003dfdc766 (&(&pd->lock)->rlock){+.+.}, at: gen8_ppgtt_alloc_pdp+0x448/0x540 [i915] <3> [414.364529] Preemption disabled at: <4> [414.364530] [<0000000000000000>] 0x0 <4> [414.364696] CPU: 0 PID: 3905 Comm: i915_selftest Tainted: G U 5.2.0-rc7-CI-CI_DRM_6403+ #1 <4> [414.364698] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 <4> [414.364699] Call Trace: <4> [414.364704] dump_stack+0x67/0x9b <4> [414.364708] ___might_sleep+0x167/0x250 <4> [414.364777] vm_free_page+0x24/0xc0 [i915] <4> [414.364852] free_pd+0xf/0x20 [i915] <4> [414.364897] gen8_ppgtt_alloc_pdp+0x489/0x540 [i915] <4> [414.364946] gen8_ppgtt_alloc_4lvl+0x8e/0x2e0 [i915] <4> [414.364992] ppgtt_bind_vma+0x2e/0x60 [i915] <4> [414.365039] i915_vma_bind+0xe8/0x2c0 [i915] <4> [414.365088] __i915_vma_do_pin+0xa1/0xd20 [i915] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111050 Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190703171913.16585-3-chris@chris-wilson.co.uk (cherry picked from commit 068610895ebd4bd86f496f01eb7b97e56d7269b2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8ab820145ea6..50fe72d40d8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1446,7 +1446,8 @@ unwind_pd: gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); GEM_BUG_ON(!atomic_read(&pdp->used)); atomic_dec(&pdp->used); - free_pd(vm, pd); + GEM_BUG_ON(alloc); + alloc = pd; /* defer the free to after the lock */ } spin_unlock(&pdp->lock); unwind: @@ -1515,7 +1516,8 @@ unwind_pdp: spin_lock(&pml4->lock); if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pd(vm, pdp); + GEM_BUG_ON(alloc); + alloc = pdp; /* defer the free until after the lock */ } spin_unlock(&pml4->lock); unwind: -- cgit v1.2.3 From 5f4c82c89ff0e11b31561aa7e547acb10bf650c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jul 2019 21:16:56 +0100 Subject: drm/i915/gtt: Mark the freed page table entries with scratch On unwinding the allocation error path and having freed the page table entry, it is imperative that we mark it as scratch. <4> [416.075569] general protection fault: 0000 [#1] PREEMPT SMP PTI <4> [416.075801] CPU: 0 PID: 2385 Comm: kworker/u2:11 Tainted: G U 5.2.0-rc7-CI-Patchwork_13534+ #1 <4> [416.076162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 <4> [416.076522] Workqueue: i915 __i915_vm_release [i915] <4> [416.076754] RIP: 0010:gen8_ppgtt_cleanup_3lvl+0x58/0xb0 [i915] <4> [416.077023] Code: 81 e2 04 fe ff ff 81 c2 ff 01 00 00 4c 8d 74 d6 58 4d 8b 65 00 4d 3b a7 28 02 00 00 74 40 49 8d 5c 24 50 49 81 c4 50 10 00 00 <48> 8b 2b 49 3b af 20 02 00 00 74 13 4c 89 ff 48 89 ee e8 01 fb ff <4> [416.077445] RSP: 0018:ffffc9000046bd98 EFLAGS: 00010206 <4> [416.077625] RAX: 0001000000000000 RBX: 6b6b6b6b6b6b6bbb RCX: 8b4b56d500000000 <4> [416.077838] RDX: 00000000000001ff RSI: ffff88805a578008 RDI: ffff88805bd0efc8 <4> [416.078167] RBP: ffff88805bd0efc8 R08: 0000000004e42b93 R09: 0000000000000001 <4> [416.078381] R10: 0000000000000000 R11: ffff888077a1b0b8 R12: 6b6b6b6b6b6b7bbb <4> [416.078594] R13: ffff88805a578058 R14: ffff88805a579058 R15: ffff88805bd0efc8 <4> [416.078815] FS: 0000000000000000(0000) GS:ffff88807da00000(0000) knlGS:0000000000000000 <4> [416.079395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [416.079851] CR2: 000056160fec2b14 CR3: 0000000071bbc003 CR4: 00000000003606f0 <4> [416.080388] Call Trace: <4> [416.080828] gen8_ppgtt_cleanup+0x64/0x100 [i915] <4> [416.081399] __i915_vm_release+0xfc/0x1d0 [i915] Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190704201656.15775-1-chris@chris-wilson.co.uk (cherry picked from commit e7539b79f703a6b533385088fc15cb5c9ab3f56f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 50fe72d40d8b..7015a97b1097 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1444,6 +1444,7 @@ unwind_pd: spin_lock(&pdp->lock); if (atomic_dec_and_test(&pd->used)) { gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe); + pdp->entry[pdpe] = vm->scratch_pd; GEM_BUG_ON(!atomic_read(&pdp->used)); atomic_dec(&pdp->used); GEM_BUG_ON(alloc); @@ -1516,6 +1517,7 @@ unwind_pdp: spin_lock(&pml4->lock); if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + pml4->entry[pml4e] = vm->scratch_pdp; GEM_BUG_ON(alloc); alloc = pdp; /* defer the free until after the lock */ } -- cgit v1.2.3 From aa56a292ce623734ddd30f52d73f527d1f3529b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Jul 2019 15:03:27 +0100 Subject: drm/i915/userptr: Acquire the page lock around set_page_dirty() set_page_dirty says: For pages with a mapping this should be done under the page lock for the benefit of asynchronous memory errors who prefer a consistent dirty state. This rule can be broken in some special cases, but should be better not to. Under those rules, it is only safe for us to use the plain set_page_dirty calls for shmemfs/anonymous memory. Userptr may be used with real mappings and so needs to use the locked version (set_page_dirty_lock). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317 Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190708140327.26825-1-chris@chris-wilson.co.uk (cherry picked from commit cb6d7c7dc7ff8cace666ddec66334117a6068ce2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 528b61678334..2caa594322bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -664,7 +664,15 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) - set_page_dirty(page); + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + */ + set_page_dirty_lock(page); mark_page_accessed(page); put_page(page); -- cgit v1.2.3 From 06c12ae3b401238477e65e8c4e04e065699a6115 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 9 Jul 2019 15:33:39 +0300 Subject: drm/i915/perf: ensure we keep a reference on the driver The i915 perf stream has its own file descriptor and is tied to reference of the driver. We haven't taken care of keep the driver alive. Signed-off-by: Lionel Landwerlin Suggested-by: Chris Wilson Fixes: eec688e1420da5 ("drm/i915: Add i915 perf infrastructure") Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190709123351.5645-2-lionel.g.landwerlin@intel.com (cherry picked from commit a5af1df716c123a09341351008fc497bea137b77) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1ae06a1b6749..629511ea9a18 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2515,6 +2515,9 @@ static int i915_perf_release(struct inode *inode, struct file *file) i915_perf_destroy_locked(stream); mutex_unlock(&dev_priv->perf.lock); + /* Release the reference the perf stream kept on the driver. */ + drm_dev_put(&dev_priv->drm); + return 0; } @@ -2650,6 +2653,11 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, if (!(param->flags & I915_PERF_FLAG_DISABLED)) i915_perf_enable_locked(stream); + /* Take a reference on the driver that will be kept with stream_fd + * until its release. + */ + drm_dev_get(&dev_priv->drm); + return stream_fd; err_open: -- cgit v1.2.3 From 8f48de49795ca52f70c96558ccc6a0c174504779 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 10 Jul 2019 11:55:24 +0100 Subject: drm/i915/perf: add missing delay for OA muxes configuration This was dropped from the original patch series, we weren't sure whether it was needed at the time. More recent tests show it's definitely needed to have acurate performance data. Signed-off-by: Lionel Landwerlin Fixes: 19f81df2859eb1 ("drm/i915/perf: Add OA unit support for Gen 8+") Acked-by: Chris Wilson [ickle: combine duplicate code and comments] Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190710105524.23017-1-chris@chris-wilson.co.uk (cherry picked from commit 14bfcd3e0daeb0f757a02aac85fd03e0933ab37e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 49 +++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 629511ea9a18..5140017f9a39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1567,28 +1567,10 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } -static int hsw_enable_metric_set(struct i915_perf_stream *stream) +static void delay_after_mux(void) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; - - /* PRM: - * - * OA unit is using “crclk” for its functionality. When trunk - * level clock gating takes place, OA clock would be gated, - * unable to count the events from non-render clock domain. - * Render clock gating must be disabled when OA is enabled to - * count the events from non-render domain. Unit level clock - * gating for RCS should also be disabled. - */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - - /* It apparently takes a fairly long time for a new MUX + /* + * It apparently takes a fairly long time for a new MUX * configuration to be be applied after these register writes. * This delay duration was derived empirically based on the * render_basic config but hopefully it covers the maximum @@ -1610,6 +1592,30 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * a delay at this location would mitigate any invalid reports. */ usleep_range(15000, 20000); +} + +static int hsw_enable_metric_set(struct i915_perf_stream *stream) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + const struct i915_oa_config *oa_config = stream->oa_config; + + /* + * PRM: + * + * OA unit is using “crclk” for its functionality. When trunk + * level clock gating takes place, OA clock would be gated, + * unable to count the events from non-render clock domain. + * Render clock gating must be disabled when OA is enabled to + * count the events from non-render domain. Unit level clock + * gating for RCS should also be disabled. + */ + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN7_DOP_CLOCK_GATE_ENABLE)); + I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | + GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -1835,6 +1841,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) return ret; config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); + delay_after_mux(); config_oa_regs(dev_priv, oa_config->b_counter_regs, oa_config->b_counter_regs_len); -- cgit v1.2.3 From 982b1d002f16c2695871e005c4132060c836db56 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jul 2019 09:09:28 +0100 Subject: drm/i915: Lock the engine while dumping the active request We cannot let the request be retired and freed while we are trying to dump it during error capture. It is not sufficient just to grab a reference to the request, as during retirement we may free the ring which we are also dumping. So take the engine lock to prevent retiring and freeing of the request. Reported-by: Alex Shumsky Fixes: 83c317832eb1 ("drm/i915: Dump the ringbuffer of the active request for debugging") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Alex Shumsky Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190715080946.15593-6-chris@chris-wilson.co.uk (cherry picked from commit cfe7288c276e359eebf057699fe86c2f8af14224) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 ++++------- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++++-- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7fd33e81c2d9..aa5a1f11a91b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1471,6 +1471,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; + unsigned long flags; if (header) { va_list ap; @@ -1490,10 +1491,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_engine_count(error, engine), i915_reset_count(error)); - rcu_read_lock(); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1513,8 +1513,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); } - - rcu_read_unlock(); + spin_unlock_irqrestore(&engine->active.lock, flags); wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { @@ -1672,7 +1671,6 @@ struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; - unsigned long flags; /* * We are called by the error capture, reset and to dump engine @@ -1685,7 +1683,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->active.lock, flags); + lockdep_assert_held(&engine->active.lock); list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; @@ -1700,7 +1698,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 41a511d5267f..8bc76fcff70d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1418,6 +1418,7 @@ static void gem_record_rings(struct i915_gpu_state *error) struct intel_engine_cs *engine = i915->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; struct i915_request *request; + unsigned long flags; ee->engine_id = -1; @@ -1429,10 +1430,11 @@ static void gem_record_rings(struct i915_gpu_state *error) error_record_engine_registers(error, engine, ee); error_record_engine_execlists(engine, ee); + spin_lock_irqsave(&engine->active.lock, flags); request = intel_engine_find_active_request(engine); if (request) { struct i915_gem_context *ctx = request->gem_context; - struct intel_ring *ring; + struct intel_ring *ring = request->ring; ee->vm = ctx->vm ?: &ggtt->vm; @@ -1462,7 +1464,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->rq_post = request->postfix; ee->rq_tail = request->tail; - ring = request->ring; ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = @@ -1470,6 +1471,7 @@ static void gem_record_rings(struct i915_gpu_state *error) engine_record_requests(engine, request, ee); } + spin_unlock_irqrestore(&engine->active.lock, flags); ee->hws_page = i915_error_object_create(i915, -- cgit v1.2.3 From a8f196a0fa6391a436f63f360a1fb57031fdf26c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Jul 2019 14:45:36 +0300 Subject: drm/i915: Make sure cdclk is high enough for DP audio on VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VLV/CHV there is some kind of linkage between the cdclk frequency and the DP link frequency. The spec says: "For DP audio configuration, cdclk frequency shall be set to meet the following requirements: DP Link Frequency(MHz) | Cdclk frequency(MHz) 270 | 320 or higher 162 | 200 or higher" I suspect that would more accurately be expressed as "cdclk >= DP link clock", and in any case we can express it like that in the code because of the limited set of cdclk (200, 266, 320, 400 MHz) and link frequencies (162 and 270 MHz) we support. Without this we can end up in a situation where the cdclk is too low and enabling DP audio will kill the pipe. Happens eg. with 2560x1440 modes where the 266MHz cdclk is sufficient to pump the pixels (241.5 MHz dotclock) but is too low for the DP audio due to the link frequency being 270 MHz. v2: Spell out the cdclk and link frequencies we actually support Cc: stable@vger.kernel.org Tested-by: Stefan Gottwald Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111149 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190717114536.22937-1-ville.syrjala@linux.intel.com Acked-by: Chris Wilson (cherry picked from commit bffb31f73b29a60ef693842d8744950c2819851d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 8993ab283562..0d19bbd08122 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2239,6 +2239,17 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); + /* + * "For DP audio configuration, cdclk frequency shall be set to + * meet the following requirements: + * DP Link Frequency(MHz) | Cdclk frequency(MHz) + * 270 | 320 or higher + * 162 | 200 or higher" + */ + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio) + min_cdclk = max(crtc_state->port_clock, min_cdclk); + /* * On Valleyview some DSI panels lose (v|h)sync when the clock is lower * than 320000KHz. -- cgit v1.2.3 From 6d61f716a01ec0e134de38ae97e71d6fec5a6ff6 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 17 Jul 2019 15:34:51 -0700 Subject: drm/i915/vbt: Fix VBT parsing for the PSR section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single 32-bit PSR2 training pattern field follows the sixteen element array of PSR table entries in the VBT spec. But, we incorrectly define this PSR2 field for each of the PSR table entries. As a result, the PSR1 training pattern duration for any panel_type != 0 will be parsed incorrectly. Secondly, PSR2 training pattern durations for VBTs with bdb version >= 226 will also be wrong. Cc: Rodrigo Vivi Cc: José Roberto de Souza Cc: stable@vger.kernel.org Cc: stable@vger.kernel.org #v5.2 Fixes: 88a0d9606aff ("drm/i915/vbt: Parse and use the new field with PSR2 TP2/3 wakeup time") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111088 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204183 Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Acked-by: Rodrigo Vivi Tested-by: François Guerraz Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190717223451.2595-1-dhinakaran.pandiyan@intel.com (cherry picked from commit b5ea9c9337007d6e700280c8a60b4e10d070fb53) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c4710889cb32..3ef4e9f573cf 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -765,7 +765,7 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) } if (bdb->version >= 226) { - u32 wakeup_time = psr_table->psr2_tp2_tp3_wakeup_time; + u32 wakeup_time = psr->psr2_tp2_tp3_wakeup_time; wakeup_time = (wakeup_time >> (2 * panel_type)) & 0x3; switch (wakeup_time) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 2f4894e9a03d..5ddbe71ab423 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -478,13 +478,13 @@ struct psr_table { /* TP wake up time in multiple of 100 */ u16 tp1_wakeup_time; u16 tp2_tp3_wakeup_time; - - /* PSR2 TP2/TP3 wakeup time for 16 panels */ - u32 psr2_tp2_tp3_wakeup_time; } __packed; struct bdb_psr { struct psr_table psr_table[16]; + + /* PSR2 TP2/TP3 wakeup time for 16 panels */ + u32 psr2_tp2_tp3_wakeup_time; } __packed; /* -- cgit v1.2.3 From 0bbfdce345c8cf01a3a985fa99fefd2146dcc748 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 17 Jul 2019 19:06:19 +0100 Subject: drm/i915: Fix GEN8_MCR_SELECTOR programming fls returns bit positions starting from one for the lsb and the MCR register expects zero based (sub)slice addressing. Incorrent MCR programming can have the effect of directing MMIO reads of registers in the 0xb100-0xb3ff range to invalid subslice returning zeroes instead of actual content. Signed-off-by: Tvrtko Ursulin Fixes: 1e40d4aea57b ("drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190717180624.20354-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit 15160879d47213c32f357bc67b6014d9aaf14ed7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index aa5a1f11a91b..f25632c9b292 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -969,9 +969,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + unsigned int slice = fls(sseu->slice_mask) - 1; + unsigned int subslice; u32 mcr_s_ss_select; - u32 slice = fls(sseu->slice_mask); - u32 subslice = fls(sseu->subslice_mask[slice]); + + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); + subslice = fls(sseu->subslice_mask[slice]); + GEM_BUG_ON(!subslice); + subslice--; if (IS_GEN(dev_priv, 10)) mcr_s_ss_select = GEN8_MCR_SLICE(slice) | -- cgit v1.2.3 From 89f5752307cf53010d97503ac501b2ca1b089922 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 28 Jun 2019 17:36:18 +0300 Subject: drm/i915: Fix the TBT AUX power well enabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the mapping from a TBT AUX power well index to the DP_AUX_CH_CTL register. Fixes: c7375d9542f1 ("drm/i915: Configure AUX_CH_CTL when enabling the AUX power domain") Cc: José Roberto de Souza Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190628143635.22066-7-imre.deak@intel.com (cherry picked from commit 29ae36abf08f943b76a2959f5000c44efa335be7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index c93ad512014c..2d1939db108f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -438,16 +438,23 @@ icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv, #define ICL_AUX_PW_TO_CH(pw_idx) \ ((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A) +#define ICL_TBT_AUX_PW_TO_CH(pw_idx) \ + ((pw_idx) - ICL_PW_CTL_IDX_AUX_TBT1 + AUX_CH_C) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum aux_ch aux_ch = ICL_AUX_PW_TO_CH(power_well->desc->hsw.idx); + int pw_idx = power_well->desc->hsw.idx; + bool is_tbt = power_well->desc->hsw.is_tc_tbt; + enum aux_ch aux_ch; u32 val; + aux_ch = is_tbt ? ICL_TBT_AUX_PW_TO_CH(pw_idx) : + ICL_AUX_PW_TO_CH(pw_idx); val = I915_READ(DP_AUX_CH_CTL(aux_ch)); val &= ~DP_AUX_CH_CTL_TBT_IO; - if (power_well->desc->hsw.is_tc_tbt) + if (is_tbt) val |= DP_AUX_CH_CTL_TBT_IO; I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); -- cgit v1.2.3 From c00f9c6b79f7e1c5caf774c38e9fd5dad2d2ef1c Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 12 Jun 2019 11:17:46 +0800 Subject: drm/i915/gvt: remove duplicate include of trace.h This removes duplicate include of trace.h. Found by Hariprasad Kelam with includecheck. Reported-by: Hariprasad Kelam Reviewed-by: Yan Zhao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/trace_points.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/trace_points.c b/drivers/gpu/drm/i915/gvt/trace_points.c index a3deed692b9c..fe552e877e09 100644 --- a/drivers/gpu/drm/i915/gvt/trace_points.c +++ b/drivers/gpu/drm/i915/gvt/trace_points.c @@ -28,8 +28,6 @@ * */ -#include "trace.h" - #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "trace.h" -- cgit v1.2.3 From d18fd0576e05a4b03b588e131093b0437fccb75f Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:51 +0800 Subject: drm/i915/gvt: Warning for invalid ggtt access Instead of silently return virtual ggtt entries that guest is allowed to access, this patch add extra range check. If guest read out of range, it will print a warning and return 0. If guest write out of range, the write will be dropped without any message. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53115bdae12b..4b04af569c05 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2141,11 +2141,20 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; unsigned long index = off >> info->gtt_entry_size_shift; + unsigned long gma; struct intel_gvt_gtt_entry e; if (bytes != 4 && bytes != 8) return -EINVAL; + gma = index << I915_GTT_PAGE_SHIFT; + if (!intel_gvt_ggtt_validate_range(vgpu, + gma, 1 << I915_GTT_PAGE_SHIFT)) { + gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); + memset(p_data, 0, bytes); + return 0; + } + ggtt_get_guest_entry(ggtt_mm, &e, index); memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), bytes); -- cgit v1.2.3 From c25144098bee19b089c8a37c54517bf467f06403 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:52 +0800 Subject: drm/i915/gvt: Don't use ggtt_validdate_range() with size=0 Use vgpu_gmadr_is_valid() directly instead. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 65e847392aea..8bb292b01271 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,7 +245,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -368,7 +368,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); @@ -472,7 +472,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) + if (!vgpu_gmadr_is_valid(vgpu, plane->base)) return -EINVAL; plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); -- cgit v1.2.3 From 2089a76ade9005a06c5e08e8454f45f3625fdc1c Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 27 May 2019 13:45:53 +0800 Subject: drm/i915/gvt: Checking workload's gma earlier Workload contains RB and WA_CTX which are in ggtt space, if they aren't in valid ggtt space, the workload shouldn't be shadowed and scanned. So checking them earlier to avoid shadow them. Reviewed-by: Zhenyu Wang Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 ---------- drivers/gpu/drm/i915/gvt/scheduler.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6ea88270c818..b09dc315e2da 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2674,11 +2674,6 @@ static int scan_workload(struct intel_vgpu_workload *workload) gma_head == gma_tail) return 0; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; @@ -2724,11 +2719,6 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.workload = workload; s.is_ctx_wa = true; - if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { - ret = -EINVAL; - goto out; - } - ret = ip_gma_set(&s, gma_head); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 2144fb46d0e1..6469366c1753 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1492,6 +1492,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + if (!intel_gvt_ggtt_validate_range(vgpu, start, + _RING_CTL_BUF_SIZE(ctl))) { + gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); + return ERR_PTR(-EINVAL); + } + workload = alloc_workload(vgpu); if (IS_ERR(workload)) return workload; @@ -1516,9 +1522,31 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->wa_ctx.indirect_ctx.size = (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * CACHELINE_BYTES; + + if (workload->wa_ctx.indirect_ctx.size != 0) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.indirect_ctx.guest_gma, + workload->wa_ctx.indirect_ctx.size)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", + workload->wa_ctx.indirect_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } + workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; workload->wa_ctx.per_ctx.valid = per_ctx & 1; + if (workload->wa_ctx.per_ctx.valid) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.per_ctx.guest_gma, + CACHELINE_BYTES)) { + kmem_cache_free(s->workloads, workload); + gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", + workload->wa_ctx.per_ctx.guest_gma); + return ERR_PTR(-EINVAL); + } + } } gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", -- cgit v1.2.3 From 7366aeb77cd840f3edea02c65065d40affaa7f45 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 18 Jul 2019 01:10:24 +0800 Subject: drm/i915/gvt: fix incorrect cache entry for guest page mapping GPU hang observed during the guest OCL conformance test which is caused by THP GTT feature used durning the test. It was observed the same GFN with different size (4K and 2M) requested from the guest in GVT. So during the guest page dma map stage, it is required to unmap first with orginal size and then remap again with requested size. Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") Cc: stable@vger.kernel.org Reviewed-by: Zhenyu Wang Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 144301b778df..23aa3e50cbf8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1904,6 +1904,18 @@ static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); + if (ret) + goto err_unmap; + } else if (entry->size != size) { + /* the same gfn with different size: unmap and re-map */ + gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); + __gvt_cache_remove_entry(vgpu, entry); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; -- cgit v1.2.3 From ef5b0b444e6297d03ac0bdc0c82f65396ef4dccd Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 20 Jun 2019 10:29:24 -0400 Subject: drm/i915/gvt: grab runtime pm first for forcewake use in workload_thread, it should grab runtime pm wakelock and later uncore forcewake get will check rpm wakelock held successfully. otherwise, sometimes, rpm wakelock not hold and print call trace below: Call Trace: intel_uncore_forcewake_get+0x15/0x20 [i915] workload_thread+0x5f9/0x16f0 [i915] ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to+0x85/0x3f0 ? __switch_to_asm+0x40/0x70 ? do_wait_intr_irq+0x90/0x90 kthread+0x121/0x140 ? intel_vgpu_clean_workloads+0x100/0x100 [i915] ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 --[ end trace 86525f742a02e12c ]-- v2: adapted to use rpm structure. Fixes: 251d46b0875c ("drm/i915/gvt: Pin the per-engine GVT shadow contexts") Reviewed-by: Zhenyu Wang Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6469366c1753..196b4155a309 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -990,6 +990,7 @@ static int workload_thread(void *priv) int ret; bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm; kfree(p); @@ -1013,6 +1014,8 @@ static int workload_thread(void *priv) workload->ring_id, workload, workload->vgpu->id); + intel_runtime_pm_get(rpm); + gvt_dbg_sched("ring id %d will dispatch workload %p\n", workload->ring_id, workload); @@ -1042,6 +1045,7 @@ complete: intel_uncore_forcewake_put(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put_unchecked(rpm); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } -- cgit v1.2.3 From 4187414808095f645ca0661f8dde77617e2e7cb3 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Thu, 4 Jul 2019 16:45:06 +0800 Subject: drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled. Windows guest can't run after force-TDR with host log: ... gvt: vgpu 1: workload shadow ppgtt isn't ready gvt: vgpu 1: fail to dispatch workload, skip ... The error is raised by set_context_ppgtt_from_shadow(), when it checks and found the shadow_mm isn't marked as shadowed. In work thread before each submission, a shadow_mm is set to shadowed in: shadow_ppgtt_mm() <-intel_vgpu_pin_mm() <-prepare_workload() <-dispatch_workload() <-workload_thread() However checking whether or not shadow_mm is shadowed is prior to it: set_context_ppgtt_from_shadow() <-dispatch_workload() <-workload_thread() In normal case, create workload will check the existence of shadow_mm, if not it will create a new one and marked as shadowed. If already exist it will reuse the old one. Since shadow_mm is reused, checking of shadowed in set_context_ppgtt_from_shadow() actually always see the state set in creation, but not the state set in intel_vgpu_pin_mm(). When force-TDR, all engines are reset, since it's not dmlr level, all ppgtt_mm are invalidated but not destroyed. Invalidation will mark all reused shadow_mm as not shadowed but still keeps in ppgtt_mm_list_head. If workload submission phase those shadow_mm are reused with shadowed not set, then set_context_ppgtt_from_shadow() will report error. Pin for context after shadow_mm pinned and shadow pdps settled. v2: Move set_context_ppgtt_from_shadow() after prepare_workload(). (zhenyu) v3: Move set_context_ppgtt_from_shadow() after shadow pdps updated.(zhenyu) Fixes: 4f15665ccbba ("drm/i915: Add ppgtt to GVT GEM context") Cc: stable@vger.kernel.org Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 196b4155a309..9f3fd7d96a69 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -364,16 +364,13 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } -static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) +static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, + struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); int i = 0; - if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed) - return -EINVAL; - if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; } else { @@ -384,8 +381,6 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } - - return 0; } static int @@ -614,6 +609,8 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int prepare_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ring = workload->ring_id; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -622,8 +619,16 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } + update_shadow_pdps(workload); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -674,7 +679,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = &vgpu->submission; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -685,13 +689,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); - ret = set_context_ppgtt_from_shadow(workload, - s->shadow[ring_id]->gem_context); - if (ret < 0) { - gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); - goto err_req; - } - ret = intel_gvt_workload_req_alloc(workload); if (ret) goto err_req; -- cgit v1.2.3 From 0de50e40fc685fed4d6896a379b123f859ffb17b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:49 +0100 Subject: drm/i915: Lift intel_engines_resume() to callers Since the reset path wants to recover the engines itself, it only wants to reinitialise the hardware using i915_gem_init_hw(). Pull the call to intel_engines_resume() to the module init/resume path so we can avoid it during reset. Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-3-chris@chris-wilson.co.uk (cherry picked from commit 092be382a2602067766f190a113514d469162456) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 7 ++++--- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 24 ------------------------ drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 21 ++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 21 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem.c | 25 +++++++++++-------------- 7 files changed, 56 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..914b5d4112bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -253,14 +253,15 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); + if (i915_gem_init_hw(i915)) + goto err_wedged; + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_resume(i915)) goto err_wedged; intel_uc_resume(i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2ce00d3dc42a..ae5b6baf6dff 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,27 +142,3 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) { intel_wakeref_init(&engine->wakeref); } - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(i915); - - return err; -} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index b326cd993d60..f6f213fbc98c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -17,6 +17,4 @@ void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); -int intel_engines_resume(struct drm_i915_private *i915); - #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7b5967751762..9f8f7f54191f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -118,10 +119,11 @@ void intel_gt_sanitize(struct drm_i915_private *i915, bool force) intel_engine_reset(engine, false); } -void intel_gt_resume(struct drm_i915_private *i915) +int intel_gt_resume(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -129,9 +131,12 @@ void intel_gt_resume(struct drm_i915_private *i915) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); @@ -139,5 +144,19 @@ void intel_gt_resume(struct drm_i915_private *i915) ce = engine->preempt_context; if (ce) ce->ops->reset(ce); + + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 7dd1130a19a4..53f342b20181 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,6 @@ void intel_gt_pm_put(struct drm_i915_private *i915); void intel_gt_pm_init(struct drm_i915_private *i915); void intel_gt_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); +int intel_gt_resume(struct drm_i915_private *i915); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4c478b38e420..0439ed66e969 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -951,6 +951,21 @@ static int do_reset(struct drm_i915_private *i915, return gt_reset(i915, stalled_mask); } +static int resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -1024,9 +1039,13 @@ void i915_reset(struct drm_i915_private *i915, if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } + ret = resume(i915); + if (ret) + goto taint; + i915_queue_hangcheck(i915); finish: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 190ad54fb072..8a659d3d7435 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gemfs.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" @@ -1307,21 +1306,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_engines_resume(dev_priv); - if (ret) - goto cleanup_uc; - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_engines_set_scheduler_caps(dev_priv); return 0; -cleanup_uc: - intel_uc_fini_hw(dev_priv); out: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - return ret; } @@ -1580,6 +1571,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) goto err_uc_init; + /* Only when the HW is re-initialised, can we replay the requests */ + ret = intel_gt_resume(dev_priv); + if (ret) + goto err_init_hw; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1593,20 +1589,20 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ret = intel_engines_verify_workarounds(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; ret = __intel_engines_record_defaults(dev_priv); if (ret) - goto err_init_hw; + goto err_gt; if (i915_inject_load_failure()) { ret = -ENODEV; - goto err_init_hw; + goto err_gt; } if (i915_inject_load_failure()) { ret = -EIO; - goto err_init_hw; + goto err_gt; } intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -1620,7 +1616,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_init_hw: +err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_set_wedged(dev_priv); @@ -1630,6 +1626,7 @@ err_init_hw: i915_gem_drain_workqueue(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); +err_init_hw: intel_uc_fini_hw(dev_priv); err_uc_init: intel_uc_fini(dev_priv); -- cgit v1.2.3 From b1fa6fd94fc6a5d6be85359743b5f3626f3f881c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:47 +0100 Subject: drm/i915: Add a wakeref getter for iff the wakeref is already active For use in the next patch, we want to acquire a wakeref without having to wake the device up -- i.e. only acquire the engine wakeref if the engine is already active. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-1-chris@chris-wilson.co.uk (cherry picked from commit de5147b8ce6d51f634661d7c531385371485cec6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 +++++++++- drivers/gpu/drm/i915/intel_wakeref.h | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index f6f213fbc98c..a11c893f64c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -7,12 +7,20 @@ #ifndef INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H +#include "intel_engine_types.h" +#include "intel_wakeref.h" + struct drm_i915_private; -struct intel_engine_cs; void intel_engine_pm_get(struct intel_engine_cs *engine); void intel_engine_pm_put(struct intel_engine_cs *engine); +static inline bool +intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) +{ + return intel_wakeref_get_if_active(&engine->wakeref); +} + void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 9cbb2ebf575b..38275310b196 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -65,6 +65,21 @@ intel_wakeref_get(struct intel_runtime_pm *rpm, return 0; } +/** + * intel_wakeref_get_if_in_use: Acquire the wakeref + * @wf: the wakeref + * + * Acquire a hold on the wakeref, but only if the wakeref is already + * active. + * + * Returns: true if the wakeref was acquired, false otherwise. + */ +static inline bool +intel_wakeref_get_if_active(struct intel_wakeref *wf) +{ + return atomic_inc_not_zero(&wf->count); +} + /** * intel_wakeref_put: Release the wakeref * @i915: the drm_i915_private device -- cgit v1.2.3 From 4b9bb9728c915c6079619e71e3340fe4840d9d40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Jun 2019 16:45:48 +0100 Subject: drm/i915: Only recover active engines If we issue a reset to a currently idle engine, leave it idle afterwards. This is useful to excise a linkage between reset and the shrinker. When waking the engine, we need to pin the default context image which we use for overwriting a guilty context -- if the engine is idle we do not need this pinned image! However, this pinning means that waking the engine acquires the FS_RECLAIM, and so may trigger the shrinker. The shrinker itself may need to wait upon the GPU to unbind and object and so may require services of reset; ergo we should avoid the engine wake up path. The danger in skipping the recovery for idle engines is that we leave the engine with no context defined, which may interfere with the operation of the power context on some older platforms. In practice, we should only be resetting an active GPU but it something to look out for on Ironlake (if memory serves). Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20190626154549.10066-2-chris@chris-wilson.co.uk (cherry picked from commit 18398904ca9e3ddd180e2ecd45886e146b1d9d5b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 +++++++++++++++++++------------- drivers/gpu/drm/i915/gt/selftest_reset.c | 5 ++++- 2 files changed, 26 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0439ed66e969..3f907701ef4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -687,7 +687,6 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ - intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -718,16 +717,21 @@ static void revoke_mmaps(struct drm_i915_private *i915) } } -static void reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; + intel_engine_mask_t awake = 0; enum intel_engine_id id; - intel_gt_pm_get(i915); - for_each_engine(engine, i915, id) + for_each_engine(engine, i915, id) { + if (intel_engine_pm_get_if_awake(engine)) + awake |= engine->mask; reset_prepare_engine(engine); + } intel_uc_reset_prepare(i915); + + return awake; } static void gt_revoke(struct drm_i915_private *i915) @@ -761,20 +765,22 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); + + intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915) +static void reset_finish(struct drm_i915_private *i915, + intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_signal_breadcrumbs(engine); + if (awake & engine->mask) + intel_engine_pm_put(engine); } - intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -798,6 +804,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; + intel_engine_mask_t awake; enum intel_engine_id id; if (test_bit(I915_WEDGED, &error->flags)) @@ -817,7 +824,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - reset_prepare(i915); + awake = reset_prepare(i915); /* Even if the GPU reset fails, it should still stop the engines */ if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) @@ -841,7 +848,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) engine->cancel_requests(engine); - reset_finish(i915); + reset_finish(i915, awake); GEM_TRACE("end\n"); } @@ -988,6 +995,7 @@ void i915_reset(struct drm_i915_private *i915, const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; + intel_engine_mask_t awake; int ret; GEM_TRACE("flags=%lx\n", error->flags); @@ -1004,7 +1012,7 @@ void i915_reset(struct drm_i915_private *i915, dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - reset_prepare(i915); + awake = reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1049,7 +1057,7 @@ void i915_reset(struct drm_i915_private *i915, i915_queue_hangcheck(i915); finish: - reset_finish(i915); + reset_finish(i915, awake); unlock: mutex_unlock(&error->wedge_mutex); return; @@ -1100,7 +1108,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - if (!intel_wakeref_active(&engine->wakeref)) + if (!intel_engine_pm_get_if_awake(engine)) return 0; reset_prepare_engine(engine); @@ -1135,12 +1143,11 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * process to program RING_MODE, HWSP and re-enable submission. */ ret = engine->resume(engine); - if (ret) - goto out; out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); + intel_engine_pm_put(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 89da9e7cc1ba..b5c590c9ccba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -71,13 +71,16 @@ static int igt_atomic_reset(void *arg) goto unlock; for (p = igt_atomic_phases; p->name; p++) { + intel_engine_mask_t awake; + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + awake = reset_prepare(i915); p->critical_section_begin(); reset_prepare(i915); err = intel_gpu_reset(i915, ALL_ENGINES); - reset_finish(i915); p->critical_section_end(); + reset_finish(i915, awake); if (err) { pr_err("intel_gpu_reset failed under %s\n", p->name); -- cgit v1.2.3