summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c699
1 files changed, 379 insertions, 320 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fbeaec3994e7..7f841dba87b3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -29,11 +29,11 @@
#include <linux/log2.h>
-#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_render_state.h"
+#include "i915_reset.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_workarounds.h"
@@ -43,17 +43,10 @@
*/
#define LEGACY_REQUEST_SIZE 200
-static unsigned int __intel_ring_space(unsigned int head,
- unsigned int tail,
- unsigned int size)
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
- /*
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
- GEM_BUG_ON(!is_power_of_2(size));
- return (head - tail - CACHELINE_BYTES) & (size - 1);
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_INDEX_ADDR);
}
unsigned int intel_ring_update_space(struct intel_ring *ring)
@@ -133,7 +126,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->i915) || IS_GEN5(rq->i915))
+ if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
cmd |= MI_INVALIDATE_ISP;
}
@@ -217,7 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
* really our business. That leaves only stall at scoreboard.
*/
static int
-intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
+gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
{
u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
u32 *cs;
@@ -257,7 +250,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */
- ret = intel_emit_post_sync_nonzero_flush(rq);
+ ret = gen6_emit_post_sync_nonzero_flush(rq);
if (ret)
return ret;
@@ -300,6 +293,43 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
return 0;
}
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+
+ /* Finally we can flush and with it emit the breadcrumb */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+ *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->global_seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
static int
gen7_render_ring_cs_stall_wa(struct i915_request *rq)
{
@@ -379,11 +409,111 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
return 0;
}
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = intel_hws_seqno_address(rq->engine);
+ *cs++ = rq->global_seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->global_seqno;
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+#define GEN7_XCS_WA 32
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->global_seqno;
+
+ for (i = 0; i < GEN7_XCS_WA; i++) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+ }
+
+ *cs++ = MI_FLUSH_DW;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+#undef GEN7_XCS_WA
+
+static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
+{
+ /*
+ * Keep the render interrupt unmasked as this papers over
+ * lost interrupts following a reset.
+ */
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 6)
+ mask &= ~BIT(0);
+ else
+ mask &= ~I915_USER_INTERRUPT;
+ }
+
+ intel_engine_set_hwsp_writemask(engine, mask);
+}
+
+static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct page *page = virt_to_page(engine->status_page.page_addr);
- phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
u32 addr;
addr = lower_32_bits(phys);
@@ -393,15 +523,30 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
I915_WRITE(HWS_PGA, addr);
}
-static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+ set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
+ set_hwstam(engine, ~0u);
+}
+
+static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
{
struct drm_i915_private *dev_priv = engine->i915;
- i915_reg_t mmio;
+ i915_reg_t hwsp;
- /* The ring status page addresses are no longer next to the rest of
+ /*
+ * The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
- if (IS_GEN7(dev_priv)) {
+ if (IS_GEN(dev_priv, 7)) {
switch (engine->id) {
/*
* No more rings exist on Gen7. Default case is only to shut up
@@ -410,56 +555,55 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
default:
GEM_BUG_ON(engine->id);
case RCS:
- mmio = RENDER_HWS_PGA_GEN7;
+ hwsp = RENDER_HWS_PGA_GEN7;
break;
case BCS:
- mmio = BLT_HWS_PGA_GEN7;
+ hwsp = BLT_HWS_PGA_GEN7;
break;
case VCS:
- mmio = BSD_HWS_PGA_GEN7;
+ hwsp = BSD_HWS_PGA_GEN7;
break;
case VECS:
- mmio = VEBOX_HWS_PGA_GEN7;
+ hwsp = VEBOX_HWS_PGA_GEN7;
break;
}
- } else if (IS_GEN6(dev_priv)) {
- mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
+ } else if (IS_GEN(dev_priv, 6)) {
+ hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
- mmio = RING_HWS_PGA(engine->mmio_base);
+ hwsp = RING_HWS_PGA(engine->mmio_base);
}
- if (INTEL_GEN(dev_priv) >= 6) {
- u32 mask = ~0u;
+ I915_WRITE(hwsp, offset);
+ POSTING_READ(hwsp);
+}
- /*
- * Keep the render interrupt unmasked as this papers over
- * lost interrupts following a reset.
- */
- if (engine->id == RCS)
- mask &= ~BIT(0);
+static void flush_cs_tlb(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ i915_reg_t instpm = RING_INSTPM(engine->mmio_base);
- I915_WRITE(RING_HWSTAM(engine->mmio_base), mask);
- }
+ if (!IS_GEN_RANGE(dev_priv, 6, 7))
+ return;
- I915_WRITE(mmio, engine->status_page.ggtt_offset);
- POSTING_READ(mmio);
+ /* ring should be idle before issuing a sync flush*/
+ WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
- /* Flush the TLB for this page */
- if (IS_GEN(dev_priv, 6, 7)) {
- i915_reg_t reg = RING_INSTPM(engine->mmio_base);
+ I915_WRITE(instpm,
+ _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+ INSTPM_SYNC_FLUSH));
+ if (intel_wait_for_register(dev_priv,
+ instpm, INSTPM_SYNC_FLUSH, 0,
+ 1000))
+ DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+ engine->name);
+}
- /* ring should be idle before issuing a sync flush*/
- WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
+static void ring_setup_status_page(struct intel_engine_cs *engine)
+{
+ set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
+ set_hwstam(engine, ~0u);
- I915_WRITE(reg,
- _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
- INSTPM_SYNC_FLUSH));
- if (intel_wait_for_register(dev_priv,
- reg, INSTPM_SYNC_FLUSH, 0,
- 1000))
- DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
- engine->name);
- }
+ flush_cs_tlb(engine);
}
static bool stop_ring(struct intel_engine_cs *engine)
@@ -529,17 +673,10 @@ static int init_ring_common(struct intel_engine_cs *engine)
if (HWS_NEEDS_PHYSICAL(dev_priv))
ring_setup_phys_status_page(engine);
else
- intel_ring_setup_status_page(engine);
+ ring_setup_status_page(engine);
intel_engine_reset_breadcrumbs(engine);
- if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
- I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
- I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
- if (HAS_VEBOX(dev_priv))
- I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
- }
-
/* Enforce ordering by reading HEAD register back */
I915_READ_HEAD(engine);
@@ -593,63 +730,87 @@ static int init_ring_common(struct intel_engine_cs *engine)
}
/* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_wakeup(engine);
+ intel_engine_queue_breadcrumbs(engine);
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret;
}
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
{
intel_engine_stop_cs(engine);
-
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
-
- return i915_gem_find_active_request(engine);
}
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
{
- void *vaddr = rq->ring->vaddr;
+ struct i915_timeline *tl = &engine->timeline;
+ struct i915_request *pos, *rq;
+ unsigned long flags;
u32 head;
- head = rq->infix;
- if (rq->postfix < head) {
- memset32(vaddr + head, MI_NOOP,
- (rq->ring->size - head) / sizeof(u32));
- head = 0;
+ rq = NULL;
+ spin_lock_irqsave(&tl->lock, flags);
+ list_for_each_entry(pos, &tl->requests, link) {
+ if (!i915_request_completed(pos)) {
+ rq = pos;
+ break;
+ }
}
- memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
- GEM_TRACE("%s request global=%d, current=%d\n",
- engine->name, rq ? rq->global_seqno : 0,
- intel_engine_get_seqno(engine));
+ GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+ engine->name,
+ rq ? rq->global_seqno : 0,
+ intel_engine_get_seqno(engine),
+ yesno(stalled));
/*
- * Try to restore the logical GPU state to match the continuation
- * of the request queue. If we skip the context/PD restore, then
- * the next request may try to execute assuming that its context
- * is valid and loaded on the GPU and so may try to access invalid
- * memory, prompting repeated GPU hangs.
+ * The guilty request will get skipped on a hung engine.
*
- * If the request was guilty, we still restore the logical state
- * in case the next request requires it (e.g. the aliasing ppgtt),
- * but skip over the hung batch.
+ * Users of client default contexts do not rely on logical
+ * state preserved between batches so it is safe to execute
+ * queued requests following the hang. Non default contexts
+ * rely on preserved state, so skipping a batch loses the
+ * evolution of the state and it needs to be considered corrupted.
+ * Executing more queued batches on top of corrupted state is
+ * risky. But we take the risk by trying to advance through
+ * the queued requests in order to make the client behaviour
+ * more predictable around resets, by not throwing away random
+ * amount of batches it has prepared for execution. Sophisticated
+ * clients can use gem_reset_stats_ioctl and dma fence status
+ * (exported via sync_file info ioctl on explicit fences) to observe
+ * when it loses the context state and should rebuild accordingly.
*
- * If the request was innocent, we try to replay the request with
- * the restored context.
+ * The context ban, and ultimately the client ban, mechanism are safety
+ * valves if client submission ends up resulting in nothing more than
+ * subsequent hangs.
*/
+
if (rq) {
- /* If the rq hung, jump to its breadcrumb and skip the batch */
- rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
- if (rq->fence.error == -EIO)
- skip_request(rq);
+ /*
+ * Try to restore the logical GPU state to match the
+ * continuation of the request queue. If we skip the
+ * context/PD restore, then the next request may try to execute
+ * assuming that its context is valid and loaded on the GPU and
+ * so may try to access invalid memory, prompting repeated GPU
+ * hangs.
+ *
+ * If the request was guilty, we still restore the logical
+ * state in case the next request requires it (e.g. the
+ * aliasing ppgtt), but skip over the hung batch.
+ *
+ * If the request was innocent, we try to replay the request
+ * with the restored context.
+ */
+ i915_reset_request(rq, stalled);
+
+ GEM_BUG_ON(rq->ring != engine->buffer);
+ head = rq->head;
+ } else {
+ head = engine->buffer->tail;
}
+ engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+ spin_unlock_irqrestore(&tl->lock, flags);
}
static void reset_finish(struct intel_engine_cs *engine)
@@ -679,7 +840,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
return ret;
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN(dev_priv, 4, 6))
+ if (IS_GEN_RANGE(dev_priv, 4, 6))
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
/* We need to disable the AsyncFlip performance optimisations in order
@@ -688,22 +849,22 @@ static int init_render_ring(struct intel_engine_cs *engine)
*
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
- if (IS_GEN(dev_priv, 6, 7))
+ if (IS_GEN_RANGE(dev_priv, 6, 7))
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
/* Required for the hardware to program scanline values for waiting */
/* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN6(dev_priv))
+ if (IS_GEN(dev_priv, 6))
I915_WRITE(GFX_MODE,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN7(dev_priv))
+ if (IS_GEN(dev_priv, 7))
I915_WRITE(GFX_MODE_GEN7,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
_MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
- if (IS_GEN6(dev_priv)) {
+ if (IS_GEN(dev_priv, 6)) {
/* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
* policy. [...] This bit must be reset. LRA replacement
@@ -713,7 +874,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
- if (IS_GEN(dev_priv, 6, 7))
+ if (IS_GEN_RANGE(dev_priv, 6, 7))
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
if (INTEL_GEN(dev_priv) >= 6)
@@ -722,33 +883,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
return 0;
}
-static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
-{
- struct drm_i915_private *dev_priv = rq->i915;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int num_rings = 0;
-
- for_each_engine(engine, dev_priv, id) {
- i915_reg_t mbox_reg;
-
- if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
- continue;
-
- mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
- if (i915_mmio_reg_valid(mbox_reg)) {
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(mbox_reg);
- *cs++ = rq->global_seqno;
- num_rings++;
- }
- }
- if (num_rings & 1)
- *cs++ = MI_NOOP;
-
- return cs;
-}
-
static void cancel_requests(struct intel_engine_cs *engine)
{
struct i915_request *request;
@@ -760,11 +894,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(request, &engine->timeline.requests, link) {
GEM_BUG_ON(!request->global_seqno);
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &request->fence.flags))
- continue;
+ if (!i915_request_signaled(request))
+ dma_fence_set_error(&request->fence, -EIO);
- dma_fence_set_error(&request->fence, -EIO);
+ i915_request_mark_complete(request);
}
intel_write_status_page(engine,
@@ -786,94 +919,59 @@ static void i9xx_submit_request(struct i915_request *request)
intel_ring_set_tail(request->ring, request->tail));
}
-static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH;
+
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+
*cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR;
*cs++ = rq->global_seqno;
+
*cs++ = MI_USER_INTERRUPT;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
-}
-static const int i9xx_emit_breadcrumb_sz = 4;
-
-static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
+ return cs;
}
-static int
-gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
+#define GEN5_WA_STORES 8 /* must be at least 1! */
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- u32 dw1 = MI_SEMAPHORE_MBOX |
- MI_SEMAPHORE_COMPARE |
- MI_SEMAPHORE_REGISTER;
- u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
- u32 *cs;
-
- WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
+ int i;
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
- *cs++ = dw1 | wait_mbox;
- /* Throughout all of the GEM code, seqno passed implies our current
- * seqno is >= the last seqno executed. However for hardware the
- * comparison is strictly greater than.
- */
- *cs++ = signal->global_seqno - 1;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
+ *cs++ = MI_FLUSH;
- return 0;
-}
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+
+ BUILD_BUG_ON(GEN5_WA_STORES < 1);
+ for (i = 0; i < GEN5_WA_STORES; i++) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR;
+ *cs++ = rq->global_seqno;
+ }
-static void
-gen5_seqno_barrier(struct intel_engine_cs *engine)
-{
- /* MI_STORE are internally buffered by the GPU and not flushed
- * either by MI_FLUSH or SyncFlush or any other combination of
- * MI commands.
- *
- * "Only the submission of the store operation is guaranteed.
- * The write result will be complete (coherent) some time later
- * (this is practically a finite period but there is no guaranteed
- * latency)."
- *
- * Empirically, we observe that we need a delay of at least 75us to
- * be sure that the seqno write is visible by the CPU.
- */
- usleep_range(125, 250);
-}
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
-static void
-gen6_seqno_barrier(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
- /* Workaround to force correct ordering between irq and seqno writes on
- * ivb (and maybe also on snb) by reading from a CS register (like
- * ACTHD) before reading the status page.
- *
- * Note that this effectively stalls the read by the time it takes to
- * do a memory transaction, which more or less ensures that the write
- * from the GPU has sufficient time to invalidate the CPU cacheline.
- * Alternatively we could delay the interrupt from the CS ring to give
- * the write time to land, but that would incur a delay after every
- * batch i.e. much more frequent than a delay when waiting for the
- * interrupt (with the same net latency).
- *
- * Also note that to prevent whole machine hangs on gen7, we have to
- * take the spinlock to guard against concurrent cacheline access.
- */
- spin_lock_irq(&dev_priv->uncore.lock);
- POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
- spin_unlock_irq(&dev_priv->uncore.lock);
+ return cs;
}
+#undef GEN5_WA_STORES
static void
gen5_irq_enable(struct intel_engine_cs *engine)
@@ -948,6 +1046,10 @@ gen6_irq_enable(struct intel_engine_cs *engine)
I915_WRITE_IMR(engine,
~(engine->irq_enable_mask |
engine->irq_keep_mask));
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ POSTING_READ_FW(RING_IMR(engine->mmio_base));
+
gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
}
@@ -966,6 +1068,10 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ POSTING_READ_FW(RING_IMR(engine->mmio_base));
+
gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask);
}
@@ -1091,6 +1197,10 @@ int intel_ring_pin(struct intel_ring *ring)
GEM_BUG_ON(ring->vaddr);
+ ret = i915_timeline_pin(ring->timeline);
+ if (ret)
+ return ret;
+
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1107,28 +1217,32 @@ int intel_ring_pin(struct intel_ring *ring)
else
ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
if (unlikely(ret))
- return ret;
+ goto unpin_timeline;
}
ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
- return ret;
+ goto unpin_timeline;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj, map);
- if (IS_ERR(addr))
- goto err;
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto unpin_ring;
+ }
vma->obj->pin_global++;
ring->vaddr = addr;
return 0;
-err:
+unpin_ring:
i915_vma_unpin(vma);
- return PTR_ERR(addr);
+unpin_timeline:
+ i915_timeline_unpin(ring->timeline);
+ return ret;
}
void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1157,6 +1271,8 @@ void intel_ring_unpin(struct intel_ring *ring)
ring->vma->obj->pin_global--;
i915_vma_unpin(ring->vma);
+
+ i915_timeline_unpin(ring->timeline);
}
static struct i915_vma *
@@ -1467,13 +1583,18 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
struct intel_ring *ring;
int err;
- intel_engine_setup_common(engine);
+ err = intel_engine_setup_common(engine);
+ if (err)
+ return err;
- timeline = i915_timeline_create(engine->i915, engine->name);
+ timeline = i915_timeline_create(engine->i915,
+ engine->name,
+ engine->status_page.vma);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
}
+ GEM_BUG_ON(timeline->has_initial_breadcrumb);
ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
i915_timeline_put(timeline);
@@ -1493,6 +1614,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
if (err)
goto err_unpin;
+ GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
return 0;
err_unpin:
@@ -1581,10 +1704,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_rings =
- /* Use an extended w/a on gen7 if signalling from other rings */
- (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ?
- INTEL_INFO(i915)->num_rings - 1 :
- 0;
+ IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_rings - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
@@ -1597,7 +1717,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
len = 4;
- if (IS_GEN7(i915))
+ if (IS_GEN(i915, 7))
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
if (flags & MI_FORCE_RESTORE) {
GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
@@ -1611,7 +1731,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
return PTR_ERR(cs);
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
- if (IS_GEN7(i915)) {
+ if (IS_GEN(i915, 7)) {
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
if (num_rings) {
struct intel_engine_cs *signaller;
@@ -1658,7 +1778,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*/
*cs++ = MI_NOOP;
- if (IS_GEN7(i915)) {
+ if (IS_GEN(i915, 7)) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
@@ -1828,18 +1948,21 @@ static int ring_request_alloc(struct i915_request *request)
int ret;
GEM_BUG_ON(!request->hw_context->pin_count);
+ GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
- /* Flush enough space to reduce the likelihood of waiting after
+ /*
+ * Flush enough space to reduce the likelihood of waiting after
* we start building the request - in which case we will just
* have to repeat work.
*/
request->reserved_space += LEGACY_REQUEST_SIZE;
- ret = intel_ring_wait_for_space(request->ring, request->reserved_space);
+ ret = switch_context(request);
if (ret)
return ret;
- ret = switch_context(request);
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
@@ -1881,22 +2004,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
return 0;
}
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
- GEM_BUG_ON(bytes > ring->effective_size);
- if (unlikely(bytes > ring->effective_size - ring->emit))
- bytes += ring->size - ring->emit;
-
- if (unlikely(bytes > ring->space)) {
- int ret = wait_for_space(ring, bytes);
- if (unlikely(ret))
- return ret;
- }
-
- GEM_BUG_ON(ring->space < bytes);
- return 0;
-}
-
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
{
struct intel_ring *ring = rq->ring;
@@ -2129,77 +2236,15 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
}
-static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
- struct intel_engine_cs *engine)
-{
- int i;
-
- if (!HAS_LEGACY_SEMAPHORES(dev_priv))
- return;
-
- GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
- engine->semaphore.sync_to = gen6_ring_sync_to;
- engine->semaphore.signal = gen6_signal;
-
- /*
- * The current semaphore is only applied on pre-gen8
- * platform. And there is no VCS2 ring on the pre-gen8
- * platform. So the semaphore between RCS and VCS2 is
- * initialized as INVALID.
- */
- for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
- static const struct {
- u32 wait_mbox;
- i915_reg_t mbox_reg;
- } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
- [RCS_HW] = {
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
- },
- [VCS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
- },
- [BCS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
- [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
- },
- [VECS_HW] = {
- [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
- [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
- [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
- },
- };
- u32 wait_mbox;
- i915_reg_t mbox_reg;
-
- if (i == engine->hw_id) {
- wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
- mbox_reg = GEN6_NOSYNC;
- } else {
- wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
- mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
- }
-
- engine->semaphore.mbox.wait[i] = wait_mbox;
- engine->semaphore.mbox.signal[i] = mbox_reg;
- }
-}
-
static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
if (INTEL_GEN(dev_priv) >= 6) {
engine->irq_enable = gen6_irq_enable;
engine->irq_disable = gen6_irq_disable;
- engine->irq_seqno_barrier = gen6_seqno_barrier;
} else if (INTEL_GEN(dev_priv) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
- engine->irq_seqno_barrier = gen5_seqno_barrier;
} else if (INTEL_GEN(dev_priv) >= 3) {
engine->irq_enable = i9xx_irq_enable;
engine->irq_disable = i9xx_irq_disable;
@@ -2231,7 +2276,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
intel_ring_init_irq(dev_priv, engine);
- intel_ring_init_semaphores(dev_priv, engine);
engine->init_hw = init_ring_common;
engine->reset.prepare = reset_prepare;
@@ -2241,18 +2285,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->context_pin = intel_ring_context_pin;
engine->request_alloc = ring_request_alloc;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
- engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
- if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
- int num_rings;
-
- engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
- engine->emit_breadcrumb_sz += num_rings * 3;
- if (num_rings & 1)
- engine->emit_breadcrumb_sz++;
- }
+ /*
+ * Using a global execution timeline; the previous final breadcrumb is
+ * equivalent to our next initial bread so we can elide
+ * engine->emit_init_breadcrumb().
+ */
+ engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+ if (IS_GEN(dev_priv, 5))
+ engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
engine->set_default_submission = i9xx_set_default_submission;
@@ -2278,12 +2318,15 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
- if (INTEL_GEN(dev_priv) >= 6) {
+ if (INTEL_GEN(dev_priv) >= 7) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush;
- if (IS_GEN6(dev_priv))
- engine->emit_flush = gen6_render_ring_flush;
- } else if (IS_GEN5(dev_priv)) {
+ engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
+ } else if (IS_GEN(dev_priv, 6)) {
+ engine->init_context = intel_rcs_ctx_init;
+ engine->emit_flush = gen6_render_ring_flush;
+ engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
+ } else if (IS_GEN(dev_priv, 5)) {
engine->emit_flush = gen4_render_ring_flush;
} else {
if (INTEL_GEN(dev_priv) < 4)
@@ -2313,13 +2356,18 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) >= 6) {
/* gen6 bsd needs a special wa for tail updates */
- if (IS_GEN6(dev_priv))
+ if (IS_GEN(dev_priv, 6))
engine->set_default_submission = gen6_bsd_set_default_submission;
engine->emit_flush = gen6_bsd_ring_flush;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+
+ if (IS_GEN(dev_priv, 6))
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ else
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
} else {
engine->emit_flush = bsd_ring_flush;
- if (IS_GEN5(dev_priv))
+ if (IS_GEN(dev_priv, 5))
engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
else
engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
@@ -2332,11 +2380,18 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+ if (IS_GEN(dev_priv, 6))
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ else
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+
return intel_init_ring_buffer(engine);
}
@@ -2344,6 +2399,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
@@ -2351,5 +2408,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable = hsw_vebox_irq_enable;
engine->irq_disable = hsw_vebox_irq_disable;
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+
return intel_init_ring_buffer(engine);
}