From 4cd53c0c8b01fc05c3ad5b2acdad02e37d3c2f55 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Dec 2012 16:01:25 +0100 Subject: drm/i915: paper over missed irq issues with force wake voodoo Two things seem to do the trick on my ivb machine here: - prevent the gt from powering down while waiting for seqno notification interrupts by grabbing the force_wake in get_irq (and dropping it in put_irq again). - ordering writes from the ring's CS by reading a CS register, ACTHD seems to work. Only the blt&bsd ring on ivb seem to be massively affected by this, but for paranoia do this dance also on the render ring and on snb (i.e. all gpus with forcewake). Tested with Eric's glCopyPixels loop which without this patch scores a missed irq every few seconds. This patch needs my forcewake rework to use a spinlock instead of dev->struct_mutex. After crawling through docs a lot I've found the following nugget: Internal doc "SNB GT PM Programming Guide", Section 4.3.1: "GT does not generate interrupts while in RC6 (by design)" So it looks like rc6 and irq generation are indeed related. v2: Improve the comment per Eugeni Dodonov's suggestion. v3: Add the documentation snipped. Also restrict the w/a to ivb only for -fixes, as suggested by Keith Packard. Cc: stable@kernel.org Cc: Eric Anholt Cc: Kenneth Graunke Cc: Eugeni Dodonov Tested-by: Eugeni Dodonov Reviewed-by: Eugeni Dodonov Signed-Off-by: Daniel Vetter Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 77e729d4e4f0..fa5702c5da42 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -635,6 +635,19 @@ render_ring_add_request(struct intel_ring_buffer *ring, return 0; } +static u32 +gen6_ring_get_seqno(struct intel_ring_buffer *ring) +{ + struct drm_device *dev = ring->dev; + + /* Workaround to force correct ordering between irq and seqno writes on + * ivb (and maybe also on snb) by reading from a CS register (like + * ACTHD) before reading the status page. */ + if (IS_GEN7(dev)) + intel_ring_get_active_head(ring); + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +} + static u32 ring_get_seqno(struct intel_ring_buffer *ring) { @@ -811,6 +824,12 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) if (!dev->irq_enabled) return false; + /* It looks like we need to prevent the gt from suspending while waiting + * for an notifiy irq, otherwise irqs seem to get lost on at least the + * blt/bsd rings on ivb. */ + if (IS_GEN7(dev)) + gen6_gt_force_wake_get(dev_priv); + spin_lock(&ring->irq_lock); if (ring->irq_refcount++ == 0) { ring->irq_mask &= ~rflag; @@ -835,6 +854,9 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) ironlake_disable_irq(dev_priv, gflag); } spin_unlock(&ring->irq_lock); + + if (IS_GEN7(dev)) + gen6_gt_force_wake_put(dev_priv); } static bool @@ -1341,7 +1363,7 @@ static const struct intel_ring_buffer gen6_bsd_ring = { .write_tail = gen6_bsd_ring_write_tail, .flush = gen6_ring_flush, .add_request = gen6_add_request, - .get_seqno = ring_get_seqno, + .get_seqno = gen6_ring_get_seqno, .irq_get = gen6_bsd_ring_get_irq, .irq_put = gen6_bsd_ring_put_irq, .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, @@ -1476,7 +1498,7 @@ static const struct intel_ring_buffer gen6_blt_ring = { .write_tail = ring_write_tail, .flush = blt_ring_flush, .add_request = gen6_add_request, - .get_seqno = ring_get_seqno, + .get_seqno = gen6_ring_get_seqno, .irq_get = blt_ring_get_irq, .irq_put = blt_ring_put_irq, .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, @@ -1499,6 +1521,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->flush = gen6_render_ring_flush; ring->irq_get = gen6_render_ring_get_irq; ring->irq_put = gen6_render_ring_put_irq; + ring->get_seqno = gen6_ring_get_seqno; } else if (IS_GEN5(dev)) { ring->add_request = pc_render_add_request; ring->get_seqno = pc_render_get_seqno; -- cgit v1.2.3 From 8f0fc977f58c36e75e205486c1aebb9b8e4263e1 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 19 Jan 2012 21:13:47 -0800 Subject: Revert "drm/i915: Work around gen7 BLT ring synchronization issues." This reverts commit 42ff6572e5a4a7414330a4ca91f0335da67deca9. New forcewake voodoo makes this no longer necessary. Acked-by: Daniel Vetter Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fa5702c5da42..1ab842c6032e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -804,17 +804,6 @@ ring_add_request(struct intel_ring_buffer *ring, return 0; } -static bool -gen7_blt_ring_get_irq(struct intel_ring_buffer *ring) -{ - /* The BLT ring on IVB appears to have broken synchronization - * between the seqno write and the interrupt, so that the - * interrupt appears first. Returning false here makes - * i915_wait_request() do a polling loop, instead. - */ - return false; -} - static bool gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) { @@ -1600,8 +1589,5 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) *ring = gen6_blt_ring; - if (IS_GEN7(dev)) - ring->irq_get = gen7_blt_ring_get_irq; - return intel_init_ring_buffer(dev, ring); } -- cgit v1.2.3 From 8d79c3490aecfe6e51f0ba6f9780746fb1434954 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 19 Jan 2012 10:50:05 -0800 Subject: drm/i915: Remove the MI_FLUSH_ENABLE setting. We have always been using the wrong bit -- it's bit 12. However, the bit also doesn't do anything -- hardware has always accepted the MI_FLUSH command even when it was specced not to. Given that there is only one MI_FLUSH emitted in all of the driver stack on gen6+ (in i965_video.c of the 2d driver, and it should be using other code to do its flush instead), just remove the MI_FLUSH enable instead of trying to fix it. Signed-off-by: Eric Anholt Reviewed-by: Kenneth Graunke Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 77e729d4e4f0..b3da17af8997 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -399,8 +399,6 @@ static int init_render_ring(struct intel_ring_buffer *ring) if (INTEL_INFO(dev)->gen > 3) { int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH; - if (IS_GEN6(dev) || IS_GEN7(dev)) - mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; I915_WRITE(MI_MODE, mode); if (IS_GEN7(dev)) I915_WRITE(GFX_MODE_GEN7, -- cgit v1.2.3 From 96154f2faba540281073243d61108d1705d19c6d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Dec 2011 13:57:00 +0100 Subject: drm/i915: switch ring->id to be a real id ... and add a helpr function for the places where we want a flag. This way we can use ring->id to index into arrays. v2: Resurrect the missing beautification-space Chris Wilson noted. I'm moving this space around because I'll reuse ring_str in the next patch. Reviewed-by: Chris Wilson Reviewed-by: Ben Widawsky Reviewed-by: Eugeni Dodonov Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 9 +++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 20 ++++++++++---------- 5 files changed, 25 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6c3be86274e1..9c5db4edd685 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -669,9 +669,9 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data) static const char *ring_str(int ring) { switch (ring) { - case RING_RENDER: return " render"; - case RING_BSD: return " bsd"; - case RING_BLT: return " blt"; + case RCS: return "render"; + case VCS: return "bsd"; + case BCS: return "blt"; default: return ""; } } @@ -714,7 +714,7 @@ static void print_error_buffers(struct seq_file *m, seq_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s", + seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, @@ -724,6 +724,7 @@ static void print_error_buffers(struct seq_file *m, tiling_flag(err->tiling), dirty_flag(err->dirty), purgeable_flag(err->purgeable), + err->ring != -1 ? " " : "", ring_str(err->ring), cache_level_str(err->cache_level)); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c649e0f255b4..49b3ebc0e7a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -203,9 +203,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, cd->invalidate_domains |= invalidate_domains; cd->flush_domains |= flush_domains; if (flush_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= obj->ring->id; + cd->flush_rings |= intel_ring_flag(obj->ring); if (invalidate_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= ring->id; + cd->flush_rings |= intel_ring_flag(ring); } struct eb_objects { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 275ab6fecbd8..ab53edb9f294 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -804,7 +804,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err, err->tiling = obj->tiling_mode; err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; - err->ring = obj->ring ? obj->ring->id : 0; + err->ring = obj->ring ? obj->ring->id : -1; err->cache_level = obj->cache_level; if (++i == count) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b3da17af8997..48042f3b0ea6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -729,13 +729,13 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) */ if (IS_GEN7(dev)) { switch (ring->id) { - case RING_RENDER: + case RCS: mmio = RENDER_HWS_PGA_GEN7; break; - case RING_BLT: + case BCS: mmio = BLT_HWS_PGA_GEN7; break; - case RING_BSD: + case VCS: mmio = BSD_HWS_PGA_GEN7; break; } @@ -1199,7 +1199,7 @@ void intel_ring_advance(struct intel_ring_buffer *ring) static const struct intel_ring_buffer render_ring = { .name = "render ring", - .id = RING_RENDER, + .id = RCS, .mmio_base = RENDER_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_render_ring, @@ -1222,7 +1222,7 @@ static const struct intel_ring_buffer render_ring = { static const struct intel_ring_buffer bsd_ring = { .name = "bsd ring", - .id = RING_BSD, + .id = VCS, .mmio_base = BSD_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_ring_common, @@ -1332,7 +1332,7 @@ gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) /* ring buffer for Video Codec for Gen6+ */ static const struct intel_ring_buffer gen6_bsd_ring = { .name = "gen6 bsd ring", - .id = RING_BSD, + .id = VCS, .mmio_base = GEN6_BSD_RING_BASE, .size = 32 * PAGE_SIZE, .init = init_ring_common, @@ -1467,7 +1467,7 @@ static void blt_ring_cleanup(struct intel_ring_buffer *ring) static const struct intel_ring_buffer gen6_blt_ring = { .name = "blt ring", - .id = RING_BLT, + .id = BCS, .mmio_base = BLT_RING_BASE, .size = 32 * PAGE_SIZE, .init = blt_ring_init, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 68281c96c558..c8b9cc0cd0dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1,13 +1,6 @@ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ -enum { - RCS = 0x0, - VCS, - BCS, - I915_NUM_RINGS, -}; - struct intel_hw_status_page { u32 __iomem *page_addr; unsigned int gfx_addr; @@ -36,10 +29,11 @@ struct intel_hw_status_page { struct intel_ring_buffer { const char *name; enum intel_ring_id { - RING_RENDER = 0x1, - RING_BSD = 0x2, - RING_BLT = 0x4, + RCS = 0x0, + VCS, + BCS, } id; +#define I915_NUM_RINGS 3 u32 mmio_base; void __iomem *virtual_start; struct drm_device *dev; @@ -119,6 +113,12 @@ struct intel_ring_buffer { void *private; }; +static inline unsigned +intel_ring_flag(struct intel_ring_buffer *ring) +{ + return 1 << ring->id; +} + static inline u32 intel_ring_sync_index(struct intel_ring_buffer *ring, struct intel_ring_buffer *other) -- cgit v1.2.3 From 6a233c78878d8795517d716544d045d5675b3061 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Dec 2011 13:57:07 +0100 Subject: drm/i915/ringbuffer: kill snb blt workaround This was just to facilitate product enablement with pre-production hw. Allows us to kill quite a bit of cruft. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 81 +-------------------------------- 1 file changed, 2 insertions(+), 79 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 48042f3b0ea6..6e80f8368355 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1368,79 +1368,13 @@ blt_ring_put_irq(struct intel_ring_buffer *ring) GEN6_BLITTER_USER_INTERRUPT); } - -/* Workaround for some stepping of SNB, - * each time when BLT engine ring tail moved, - * the first command in the ring to be parsed - * should be MI_BATCH_BUFFER_START - */ -#define NEED_BLT_WORKAROUND(dev) \ - (IS_GEN6(dev) && (dev->pdev->revision < 8)) - -static inline struct drm_i915_gem_object * -to_blt_workaround(struct intel_ring_buffer *ring) -{ - return ring->private; -} - -static int blt_ring_init(struct intel_ring_buffer *ring) -{ - if (NEED_BLT_WORKAROUND(ring->dev)) { - struct drm_i915_gem_object *obj; - u32 *ptr; - int ret; - - obj = i915_gem_alloc_object(ring->dev, 4096); - if (obj == NULL) - return -ENOMEM; - - ret = i915_gem_object_pin(obj, 4096, true); - if (ret) { - drm_gem_object_unreference(&obj->base); - return ret; - } - - ptr = kmap(obj->pages[0]); - *ptr++ = MI_BATCH_BUFFER_END; - *ptr++ = MI_NOOP; - kunmap(obj->pages[0]); - - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) { - i915_gem_object_unpin(obj); - drm_gem_object_unreference(&obj->base); - return ret; - } - - ring->private = obj; - } - - return init_ring_common(ring); -} - -static int blt_ring_begin(struct intel_ring_buffer *ring, - int num_dwords) -{ - if (ring->private) { - int ret = intel_ring_begin(ring, num_dwords+2); - if (ret) - return ret; - - intel_ring_emit(ring, MI_BATCH_BUFFER_START); - intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset); - - return 0; - } else - return intel_ring_begin(ring, 4); -} - static int blt_ring_flush(struct intel_ring_buffer *ring, u32 invalidate, u32 flush) { uint32_t cmd; int ret; - ret = blt_ring_begin(ring, 4); + ret = intel_ring_begin(ring, 4); if (ret) return ret; @@ -1455,22 +1389,12 @@ static int blt_ring_flush(struct intel_ring_buffer *ring, return 0; } -static void blt_ring_cleanup(struct intel_ring_buffer *ring) -{ - if (!ring->private) - return; - - i915_gem_object_unpin(ring->private); - drm_gem_object_unreference(ring->private); - ring->private = NULL; -} - static const struct intel_ring_buffer gen6_blt_ring = { .name = "blt ring", .id = BCS, .mmio_base = BLT_RING_BASE, .size = 32 * PAGE_SIZE, - .init = blt_ring_init, + .init = init_ring_common, .write_tail = ring_write_tail, .flush = blt_ring_flush, .add_request = gen6_add_request, @@ -1478,7 +1402,6 @@ static const struct intel_ring_buffer gen6_blt_ring = { .irq_get = blt_ring_get_irq, .irq_put = blt_ring_put_irq, .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, - .cleanup = blt_ring_cleanup, .sync_to = gen6_blt_ring_sync_to, .semaphore_register = {MI_SEMAPHORE_SYNC_BR, MI_SEMAPHORE_SYNC_BV, -- cgit v1.2.3 From 53d227f282eb9fa4c7cdbfd691fa372b7ca8c4c3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 25 Jan 2012 16:32:49 +0100 Subject: drm/i915: fixup seqno allocation logic for lazy_request Currently we reserve seqnos only when we emit the request to the ring (by bumping dev_priv->next_seqno), but start using it much earlier for ring->oustanding_lazy_request. When 2 threads compete for the gpu and run on two different rings (e.g. ddx on blitter vs. compositor) hilarity ensued, especially when we get constantly interrupted while reserving buffers. Breakage seems to have been introduced in commit 6f392d548658a17600da7faaf8a5df25ee5f01f6 Author: Chris Wilson Date: Sat Aug 7 11:01:22 2010 +0100 drm/i915: Use a common seqno for all rings. This patch fixes up the seqno reservation logic by moving it into i915_gem_next_request_seqno. The ring->add_request functions now superflously still return the new seqno through a pointer, that will be refactored in the next patch. Note that with this change we now unconditionally allocate a seqno, even when ->add_request might fail because the rings are full and the gpu died. But this does not open up a new can of worms because we can already leave behind an outstanding_request_seqno if e.g. the caller gets interrupted with a signal while stalling for the gpu in the eviciton paths. And with the bugfix we only ever have one seqno allocated per ring (and only that ring), so there are no ordering issues with multiple outstanding seqnos on the same ring. v2: Keep i915_gem_get_seqno (but move it to i915_gem.c) to make it clear that we only have one seqno counter for all rings. Suggested by Chris Wilson. v3: As suggested by Chris Wilson use i915_gem_next_request_seqno instead of ring->oustanding_lazy_request to make the follow-up refactoring more clearly correct. Also improve the commit message with issues discussed on irc. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45181 Tested-by: Nicolas Kalkhof nkalkhof()at()web.de Reviewed-by: Chris Wilson Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 +------ drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++-------------------- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 000a9ad17ddd..563d24e7b725 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1177,12 +1177,7 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline u32 -i915_gem_next_request_seqno(struct intel_ring_buffer *ring) -{ - drm_i915_private_t *dev_priv = ring->dev->dev_private; - return ring->outstanding_lazy_request = dev_priv->next_seqno; -} +u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring); int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b51e9c3ce73..2031cc7eaa3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1576,6 +1576,28 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, } } +static u32 +i915_gem_get_seqno(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + u32 seqno = dev_priv->next_seqno; + + /* reserve 0 for non-seqno */ + if (++dev_priv->next_seqno == 0) + dev_priv->next_seqno = 1; + + return seqno; +} + +u32 +i915_gem_next_request_seqno(struct intel_ring_buffer *ring) +{ + if (ring->outstanding_lazy_request == 0) + ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); + + return ring->outstanding_lazy_request; +} + int i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, @@ -1587,6 +1609,7 @@ i915_add_request(struct intel_ring_buffer *ring, int ret; BUG_ON(request == NULL); + seqno = i915_gem_next_request_seqno(ring); ret = ring->add_request(ring, &seqno); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4956f1bff522..8a983b50a791 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -52,20 +52,6 @@ static inline int ring_space(struct intel_ring_buffer *ring) return space; } -static u32 i915_gem_get_seqno(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - u32 seqno; - - seqno = dev_priv->next_seqno; - - /* reserve 0 for non-seqno */ - if (++dev_priv->next_seqno == 0) - dev_priv->next_seqno = 1; - - return seqno; -} - static int render_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, @@ -465,7 +451,7 @@ gen6_add_request(struct intel_ring_buffer *ring, mbox1_reg = ring->signal_mbox[0]; mbox2_reg = ring->signal_mbox[1]; - *seqno = i915_gem_get_seqno(ring->dev); + *seqno = i915_gem_next_request_seqno(ring); update_mboxes(ring, *seqno, mbox1_reg); update_mboxes(ring, *seqno, mbox2_reg); @@ -563,8 +549,7 @@ static int pc_render_add_request(struct intel_ring_buffer *ring, u32 *result) { - struct drm_device *dev = ring->dev; - u32 seqno = i915_gem_get_seqno(dev); + u32 seqno = i915_gem_next_request_seqno(ring); struct pipe_control *pc = ring->private; u32 scratch_addr = pc->gtt_offset + 128; int ret; @@ -615,8 +600,7 @@ static int render_ring_add_request(struct intel_ring_buffer *ring, u32 *result) { - struct drm_device *dev = ring->dev; - u32 seqno = i915_gem_get_seqno(dev); + u32 seqno = i915_gem_next_request_seqno(ring); int ret; ret = intel_ring_begin(ring, 4); @@ -790,7 +774,7 @@ ring_add_request(struct intel_ring_buffer *ring, if (ret) return ret; - seqno = i915_gem_get_seqno(ring->dev); + seqno = i915_gem_next_request_seqno(ring); intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -- cgit v1.2.3 From 99ffa1629d737295e569267cf5940758139f9ddb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 25 Jan 2012 14:04:00 +0100 Subject: drm/i915: enable forcewake voodoo also for gen6 We still have reports of missed irqs even on Sandybridge with the HWSTAM workaround in place. Testing by the bug reporter gets rid of them with the forcewake voodoo and no HWSTAM writes. Because I've slightly botched the rebasing I've left out the ACTHD readback which is also required to get IVB working. Seems to still work on the tester's machine, so I think we should go with the more minmal approach on SNB. Especially since I've only found weak evidence for holding forcewake while waiting for an interrupt to arrive, but none for the ACTHD readback. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45181 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45332 Tested-by: Nicolas Kalkhof nkalkhof()at()web.de Acked-by: Chris Wilson Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 12 ------------ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++---- 2 files changed, 2 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 063b4577d4c6..bd6168be63d3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1785,18 +1785,6 @@ static void ironlake_irq_preinstall(struct drm_device *dev) I915_WRITE(HWSTAM, 0xeffe); - if (IS_GEN6(dev)) { - /* Workaround stalls observed on Sandy Bridge GPUs by - * making the blitter command streamer generate a - * write to the Hardware Status Page for - * MI_USER_INTERRUPT. This appears to serialize the - * previous seqno write out before the interrupt - * happens. - */ - I915_WRITE(GEN6_BLITTER_HWSTAM, ~GEN6_BLITTER_USER_INTERRUPT); - I915_WRITE(GEN6_BSD_HWSTAM, ~GEN6_BSD_USER_INTERRUPT); - } - /* XXX hotplug from PCH */ I915_WRITE(DEIMR, 0xffffffff); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8a983b50a791..e784ebb8cc27 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -798,8 +798,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) /* It looks like we need to prevent the gt from suspending while waiting * for an notifiy irq, otherwise irqs seem to get lost on at least the * blt/bsd rings on ivb. */ - if (IS_GEN7(dev)) - gen6_gt_force_wake_get(dev_priv); + gen6_gt_force_wake_get(dev_priv); spin_lock(&ring->irq_lock); if (ring->irq_refcount++ == 0) { @@ -826,8 +825,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) } spin_unlock(&ring->irq_lock); - if (IS_GEN7(dev)) - gen6_gt_force_wake_put(dev_priv); + gen6_gt_force_wake_put(dev_priv); } static bool -- cgit v1.2.3 From a71d8d94525e8fd855c0466fb586ae1cb008f3a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2012 11:25:36 +0000 Subject: drm/i915: Record the tail at each request and use it to estimate the head By recording the location of every request in the ringbuffer, we know that in order to retire the request the GPU must have finished reading it and so the GPU head is now beyond the tail of the request. We can therefore provide a conservative estimate of where the GPU is reading from in order to avoid having to read back the ring buffer registers when polling for space upon starting a new write into the ringbuffer. A secondary effect is that this allows us to convert intel_ring_buffer_wait() to use i915_wait_request() and so consolidate upon the single function to handle the complicated task of waiting upon the GPU. A necessary precaution is that we need to make that wait uninterruptible to match the existing conditions as all the callers of intel_ring_begin() have not been audited to handle ERESTARTSYS correctly. By using a conservative estimate for the head, and always processing all outstanding requests first, we prevent a race condition between using the estimate and direct reads of I915_RING_HEAD which could result in the value of the head going backwards, and the tail overflowing once again. We are also careful to mark any request that we skip over in order to free space in ring as consumed which provides a self-consistency check. Given sufficient abuse, such as a set of unthrottled GPU bound cairo-traces, avoiding the use of I915_RING_HEAD gives a 10-20% boost on Sandy Bridge (i5-2520m): firefox-paintball 18927ms -> 15646ms: 1.21x speedup firefox-fishtank 12563ms -> 11278ms: 1.11x speedup which is a mild consolation for the performance those traces achieved from exploiting the buggy autoreported head. v2: Add a few more comments and make request->tail a conservative estimate as suggested by Daniel Vetter. Signed-off-by: Chris Wilson [danvet: resolve conflicts with retirement defering and the lack of the autoreport head removal (that will go in through -fixes).] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_gem.c | 17 ++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 83 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 15 ++++++ 4 files changed, 119 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8e3eb5e282a1..0e4c073fae49 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -944,6 +944,9 @@ struct drm_i915_gem_request { /** GEM sequence number associated with this request. */ uint32_t seqno; + /** Postion in the ringbuffer of the end of the request */ + u32 tail; + /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -1213,6 +1216,8 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) } void i915_gem_retire_requests(struct drm_device *dev); +void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); + void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2031cc7eaa3a..19a06c280b12 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1605,12 +1605,20 @@ i915_add_request(struct intel_ring_buffer *ring, { drm_i915_private_t *dev_priv = ring->dev->dev_private; uint32_t seqno; + u32 request_ring_position; int was_empty; int ret; BUG_ON(request == NULL); seqno = i915_gem_next_request_seqno(ring); + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + request_ring_position = intel_ring_get_tail(ring); + ret = ring->add_request(ring, &seqno); if (ret) return ret; @@ -1619,6 +1627,7 @@ i915_add_request(struct intel_ring_buffer *ring, request->seqno = seqno; request->ring = ring; + request->tail = request_ring_position; request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); @@ -1755,7 +1764,7 @@ void i915_gem_reset(struct drm_device *dev) /** * This function clears the request list as sequence numbers are passed. */ -static void +void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; @@ -1783,6 +1792,12 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + */ + ring->last_retired_head = request->tail; list_del(&request->list); i915_gem_request_remove_from_client(request); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e784ebb8cc27..ca3972f2c6f5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -583,6 +583,7 @@ pc_render_add_request(struct intel_ring_buffer *ring, PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 128; PIPE_CONTROL_FLUSH(ring, scratch_addr); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -1107,11 +1108,89 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) return 0; } +static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool was_interruptible; + int ret; + + /* XXX As we have not yet audited all the paths to check that + * they are ready for ERESTARTSYS from intel_ring_begin, do not + * allow us to be interruptible by a signal. + */ + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; + + ret = i915_wait_request(ring, seqno, true); + + dev_priv->mm.interruptible = was_interruptible; + + return ret; +} + +static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) +{ + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + i915_gem_retire_requests_ring(ring); + + if (ring->last_retired_head != -1) { + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; + ring->space = ring_space(ring); + if (ring->space >= n) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + int space; + + if (request->tail == -1) + continue; + + space = request->tail - (ring->tail + 8); + if (space < 0) + space += ring->size; + if (space >= n) { + seqno = request->seqno; + break; + } + + /* Consume this request in case we need more space than + * is available and so need to prevent a race between + * updating last_retired_head and direct reads of + * I915_RING_HEAD. It also provides a nice sanity check. + */ + request->tail = -1; + } + + if (seqno == 0) + return -ENOSPC; + + ret = intel_ring_wait_seqno(ring, seqno); + if (ret) + return ret; + + if (WARN_ON(ring->last_retired_head == -1)) + return -ENOSPC; + + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; + ring->space = ring_space(ring); + if (WARN_ON(ring->space < n)) + return -ENOSPC; + + return 0; +} + int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; unsigned long end; + int ret; u32 head; /* If the reported head position has wrapped or hasn't advanced, @@ -1125,6 +1204,10 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) return 0; } + ret = intel_ring_wait_request(ring, n); + if (ret != -ENOSPC) + return ret; + trace_i915_ring_wait_begin(ring); if (drm_core_check_feature(dev, DRIVER_GEM)) /* With GEM the hangcheck timer should kick us out of the loop, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c8b9cc0cd0dc..bc0365b8fa4d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -46,6 +46,16 @@ struct intel_ring_buffer { int effective_size; struct intel_hw_status_page status_page; + /** We track the position of the requests in the ring buffer, and + * when each is retired we increment last_retired_head as the GPU + * must have finished processing the request and so we know we + * can advance the ringbuffer up to that position. + * + * last_retired_head is set to -1 after the value is consumed so + * we can detect new retirements. + */ + u32 last_retired_head; + spinlock_t irq_lock; u32 irq_refcount; u32 irq_mask; @@ -193,6 +203,11 @@ int intel_init_blt_ring_buffer(struct drm_device *dev); u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); void intel_ring_setup_status_page(struct intel_ring_buffer *ring); +static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) +{ + return ring->tail; +} + static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) { if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) -- cgit v1.2.3 From 5d031e5b633d910f35e6e0abce94d9d842390006 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2012 13:34:13 +0000 Subject: drm/i915: Remove use of the autoreported ringbuffer HEAD position This is a revert of 6aa56062eaba67adfb247cded244fd877329588d. This was originally introduced to workaround reads of the ringbuffer registers returning 0 on SandyBridge causing hangs due to ringbuffer overflow. The root cause here was reads through the GT powerwell require the forcewake dance, something we only learnt of later. Now it appears that reading the reported head position from the HWS is returning garbage, leading once again to hangs. For example, on q35 the autoreported head reports: [ 217.975608] head now 00010000, actual 00010000 [ 436.725613] head now 00200000, actual 00200000 [ 462.956033] head now 00210000, actual 00210010 [ 485.501409] head now 00400000, actual 00400020 [ 508.064280] head now 00410000, actual 00410000 [ 530.576078] head now 00600000, actual 00600020 [ 553.273489] head now 00610000, actual 00610018 which appears reasonably sane. In contrast, if we look at snb: [ 141.970680] head now 00e10000, actual 00008238 [ 141.974062] head now 02734000, actual 000083c8 [ 141.974425] head now 00e10000, actual 00008488 [ 141.980374] head now 032b5000, actual 000088b8 [ 141.980885] head now 03271000, actual 00008950 [ 142.040628] head now 02101000, actual 00008b40 [ 142.180173] head now 02734000, actual 00009050 [ 142.181090] head now 00000000, actual 00000ae0 [ 142.183737] head now 02734000, actual 00009050 In addition, the automatic reporting of the head position is scheduled to be defeatured in the future. It has no more utility, remove it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45492 Reviewed-by: Daniel Vetter Tested-by: Eric Anholt Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1ab842c6032e..536191540b03 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -301,7 +301,7 @@ static int init_ring_common(struct intel_ring_buffer *ring) I915_WRITE_CTL(ring, ((ring->size - PAGE_SIZE) & RING_NR_PAGES) - | RING_REPORT_64K | RING_VALID); + | RING_VALID); /* If the head is still not zero, the ring is dead */ if ((I915_READ_CTL(ring) & RING_VALID) == 0 || @@ -1132,18 +1132,6 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; unsigned long end; - u32 head; - - /* If the reported head position has wrapped or hasn't advanced, - * fallback to the slow and accurate path. - */ - head = intel_read_status_page(ring, 4); - if (head > ring->head) { - ring->head = head; - ring->space = ring_space(ring); - if (ring->space >= n) - return 0; - } trace_i915_ring_wait_begin(ring); if (drm_core_check_feature(dev, DRIVER_GEM)) -- cgit v1.2.3 From 1c7eaac737e4cca24703531ebcb566afc3ed285f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 27 Mar 2012 09:31:24 +0200 Subject: drm/i915: apply CS reg readback trick against missed IRQ on snb Ben Widawsky reported missed IRQ issues and this patch here helps. We have one other missed IRQ report still left on snb, reported by QA: https://bugs.freedesktop.org/show_bug.cgi?id=46145 This is _not_ a regression due to the forcewake voodoo though, it started showing up before that was applied and has been on-and-off for the past few weeks. According to QA this patch does not help. But the missed IRQ is always from the blt ring (despite running piglit, so also render activity expected), so I'm hopefully that this is an issue with the blt ring itself. Tested-by: Ben Widawsky Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fc66af6a9448..e25581a9f60f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -626,7 +626,7 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring) /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. */ - if (IS_GEN7(dev)) + if (IS_GEN6(dev) || IS_GEN7(dev)) intel_ring_get_active_head(ring); return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } -- cgit v1.2.3 From 27c1cbd06a7620b354cbb363834f3bb8df4f410d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Apr 2012 13:59:46 +0100 Subject: drm/i915/ringbuffer: Exclude last 2 cachlines of ring on 845g The 845g shares the errata with i830 whereby executing a command within 2 cachelines of the end of the ringbuffer may cause a GPU hang. Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e25581a9f60f..f75806e5bff5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1038,7 +1038,7 @@ int intel_init_ring_buffer(struct drm_device *dev, * of the buffer. */ ring->effective_size = ring->size; - if (IS_I830(ring->dev)) + if (IS_I830(ring->dev) || IS_845G(ring->dev)) ring->effective_size -= 128; return 0; -- cgit v1.2.3