diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_reset.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_timeline.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_engine_cs.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 87 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/selftests/i915_timeline.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/selftests/mock_engine.c | 19 |
10 files changed, 139 insertions, 60 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 761714448ff3..4e0de22f0166 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2890,7 +2890,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry(request, &engine->timeline.requests, link) { - if (__i915_request_completed(request, request->global_seqno)) + if (i915_request_completed(request)) continue; active = request; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a076fd0b7ba6..4d58770e6a8c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -199,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); + i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) @@ -621,7 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); - rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; + rq->hwsp_seqno = rq->timeline->hwsp_seqno; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ade010fe6e26..96c586d6ff4d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -289,6 +289,7 @@ long i915_request_wait(struct i915_request *rq, static inline bool i915_request_signaled(const struct i915_request *rq) { + /* The request may live longer than its HWSP, so check flags first! */ return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } @@ -340,32 +341,23 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) */ static inline bool i915_request_started(const struct i915_request *rq) { - u32 seqno; - - seqno = i915_request_global_seqno(rq); - if (!seqno) /* not yet submitted to HW */ - return false; + if (i915_request_signaled(rq)) + return true; - return i915_seqno_passed(hwsp_seqno(rq), seqno - 1); -} - -static inline bool -__i915_request_completed(const struct i915_request *rq, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return i915_seqno_passed(hwsp_seqno(rq), seqno) && - seqno == i915_request_global_seqno(rq); + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); } static inline bool i915_request_completed(const struct i915_request *rq) { - u32 seqno; + if (i915_request_signaled(rq)) + return true; - seqno = i915_request_global_seqno(rq); - if (!seqno) - return false; + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); +} - return __i915_request_completed(rq, seqno); +static inline void i915_request_mark_complete(struct i915_request *rq) +{ + rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } void i915_retire_requests(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index d2dca85a543d..bd82f9b1043f 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -760,6 +760,7 @@ static void nop_submit_request(struct i915_request *request) spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); + i915_request_mark_complete(request); intel_engine_write_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index add8fc33cf6e..e4c11414a824 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -270,6 +270,10 @@ int i915_timeline_pin(struct i915_timeline *tl) if (err) goto unpin; + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + return 0; unpin: diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 515e87846afd..ead9c4371fe1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -660,10 +660,16 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.ring = &frame->ring; frame->rq.timeline = &frame->timeline; + dw = i915_timeline_pin(&frame->timeline); + if (dw < 0) + goto out_timeline; + dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; - i915_timeline_fini(&frame->timeline); + i915_timeline_unpin(&frame->timeline); +out_timeline: + i915_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; @@ -1426,9 +1432,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", idx, count, - i915_ggtt_offset(rq->ring->vma)); + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1538,6 +1545,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + rq->timeline->hwsp_offset); print_request_ring(m, rq); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e388f37743a2..fdbb3fe8eac9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -832,10 +832,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (i915_request_signaled(rq)) - continue; + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } /* Flush the queued requests to the timeline list (for retiring). */ @@ -845,9 +845,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) priolist_for_each_request_consume(rq, rn, p, i) { list_del_init(&rq->sched.link); - - dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -2044,10 +2044,17 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->global_seqno, + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset); + + cs = gen8_emit_ggtt_write(cs, + request->global_seqno, intel_hws_seqno_address(request->engine)); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2056,18 +2063,20 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* We're using qword write, seqno should be aligned to 8 bytes. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), + request->fence.seqno, + request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + request->global_seqno, + intel_hws_seqno_address(request->engine), + PIPE_CONTROL_CS_STALL); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 174795622eb1..ee3719324e2d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -326,6 +326,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->global_seqno; @@ -427,6 +432,13 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); *cs++ = intel_hws_seqno_address(rq->engine); *cs++ = rq->global_seqno; @@ -441,10 +453,19 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -457,14 +478,21 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_FLUSH_DW; @@ -472,7 +500,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -738,7 +765,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&tl->lock, flags); list_for_each_entry(pos, &tl->requests, link) { - if (!__i915_request_completed(pos, pos->global_seqno)) { + if (!i915_request_completed(pos)) { rq = pos; break; } @@ -880,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); - if (i915_request_signaled(request)) - continue; + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); - dma_fence_set_error(&request->fence, -EIO); + i915_request_mark_complete(request); } intel_write_status_page(engine, @@ -907,14 +934,20 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -927,8 +960,15 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -937,6 +977,7 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -1169,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring) GEM_BUG_ON(ring->vaddr); + ret = i915_timeline_pin(ring->timeline); + if (ret) + return ret; + flags = PIN_GLOBAL; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1185,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring) else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); if (unlikely(ret)) - return ret; + goto unpin_timeline; } ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - return ret; + goto unpin_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else addr = i915_gem_object_pin_map(vma->obj, map); - if (IS_ERR(addr)) - goto err; + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto unpin_ring; + } vma->obj->pin_global++; ring->vaddr = addr; return 0; -err: +unpin_ring: i915_vma_unpin(vma); - return PTR_ERR(addr); +unpin_timeline: + i915_timeline_unpin(ring->timeline); + return ret; } void intel_ring_reset(struct intel_ring *ring, u32 tail) @@ -1235,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); + + i915_timeline_unpin(ring->timeline); } static struct i915_vma * diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index c34340f074cf..12ea69b1a1e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -440,11 +440,6 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) return 0; } -static u32 hwsp_address(const struct i915_timeline *tl) -{ - return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset; -} - static struct i915_request * tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) { @@ -463,7 +458,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; - err = emit_ggtt_store_dw(rq, hwsp_address(tl), value); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); if (err) rq = ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 919c89fd6ee5..95e890d7f58b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,17 @@ struct mock_ring { struct i915_timeline timeline; }; +static void mock_timeline_pin(struct i915_timeline *tl) +{ + tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + tl->pin_count--; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; @@ -76,6 +87,8 @@ static void advance(struct mock_request *request) { list_del_init(&request->link); mock_seqno_advance(request->base.engine, request->base.global_seqno); + i915_request_mark_complete(&request->base); + GEM_BUG_ON(!i915_request_completed(&request->base)); } static void hw_delay_complete(struct timer_list *t) @@ -108,6 +121,7 @@ static void hw_delay_complete(struct timer_list *t) static void mock_context_unpin(struct intel_context *ce) { + mock_timeline_unpin(ce->ring->timeline); i915_gem_context_put(ce->gem_context); } @@ -129,6 +143,7 @@ mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); + int err = -ENOMEM; if (ce->pin_count++) return ce; @@ -139,13 +154,15 @@ mock_context_pin(struct intel_engine_cs *engine, goto err; } + mock_timeline_pin(ce->ring->timeline); + ce->ops = &mock_context_ops; i915_gem_context_get(ctx); return ce; err: ce->pin_count = 0; - return ERR_PTR(-ENOMEM); + return ERR_PTR(err); } static int mock_request_alloc(struct i915_request *request) |