From 403bdd10c815029694046adf5ffde0577cbd2866 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Aug 2014 16:05:39 +0200 Subject: drm/i915: No busy-loop wait_for in the ring init code Doing a 1s wait (tops) with the cpu is a bit excessive. Tune it down like everything else in that code. v2: Also insert the missing space Chris spotted. Cc: Naresh Kumar Kachhi Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16371a444426..117543e58d48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -476,8 +476,8 @@ static bool stop_ring(struct intel_engine_cs *ring) if (!IS_GEN2(ring->dev)) { I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); - if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); return false; } } -- cgit v1.2.3 From 9bec9b1334d687c0a9fcf3d3a1987a61b4826a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 09:21:35 +0100 Subject: drm/i915: Double check ring is idle before declaring the GPU wedged During ring initialisation, sometimes we observe, though not in production hardware, that the idle flag is not set even though the ring is empty. Double check before giving up. Signed-off-by: Chris Wilson Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 117543e58d48..a059b64a0fb2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -478,7 +478,12 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); - return false; + /* Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) + return false; } } -- cgit v1.2.3 From 84c2377fcee7a43cd964b62143e9a3714130bb0c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:15 +0100 Subject: drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts As we have said a couple of times by now, logical ring contexts have their own ringbuffers: not only the backing pages, but the whole management struct. In a previous version of the series, this was achieved with two separate patches: drm/i915/bdw: Allocate ringbuffer backing objects for default global LRC drm/i915/bdw: Allocate ringbuffer for user-created LRCs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++++ 4 files changed, 46 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbae19bab4bf..eccb8e406e9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -635,6 +635,7 @@ struct intel_context { /* Execlists */ struct { struct drm_i915_gem_object *state; + struct intel_ringbuffer *ringbuf; } engine[I915_NUM_RINGS]; struct list_head link; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9f30ee80e487..0c80bb1f5420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -65,7 +65,11 @@ void intel_lr_context_free(struct intel_context *ctx) for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + if (ctx_obj) { + intel_destroy_ringbuffer_obj(ringbuf); + kfree(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); } @@ -99,6 +103,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; + struct intel_ringbuffer *ringbuf; int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); @@ -119,6 +124,39 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); + if (!ringbuf) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", + ring->name); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + ret = -ENOMEM; + return ret; + } + + ringbuf->size = 32 * PAGE_SIZE; + ringbuf->effective_size = ringbuf->size; + ringbuf->head = 0; + ringbuf->tail = 0; + ringbuf->space = ringbuf->size; + ringbuf->last_retired_head = -1; + + /* TODO: For now we put this in the mappable region so that we can reuse + * the existing ringbuffer code which ioremaps it. When we start + * creating many contexts, this will no longer work and we must switch + * to a kmapish interface. + */ + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", + ring->name, ret); + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a059b64a0fb2..064652034d7e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1519,7 +1519,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { if (!ringbuf->obj) return; @@ -1530,8 +1530,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->obj = NULL; } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 70525d0c2c74..669cc7527f9a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -355,6 +355,10 @@ intel_write_status_page(struct intel_engine_cs *ring, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf); + void intel_stop_ring_buffer(struct intel_engine_cs *ring); void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); -- cgit v1.2.3 From 0c7dd53b84def4fbbba907bef3d32a5171b617a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 16:17:44 +0200 Subject: drm/i915/bdw: Add a context and an engine pointers to the ringbuffer Any given ringbuffer is unequivocally tied to one context and one engine. By setting the appropriate pointers to them, the ringbuffer struct holds all the infromation you might need to submit a workload for processing, Execlists style. v2: Drop ring->ctx since that looks terribly ill-defined for legacy ringbuffer submission. Signed-off-by: Oscar Mateo (v1) Acked-by: Damien Lespiau (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c80bb1f5420..8f2d14da6228 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -134,6 +134,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf->ring = ring; ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 064652034d7e..c35f956ed6a0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ringbuf->size = 32 * PAGE_SIZE; + ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); init_waitqueue_head(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 669cc7527f9a..fe9d9d9d3598 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,8 @@ struct intel_ringbuffer { struct drm_i915_gem_object *obj; void __iomem *virtual_start; + struct intel_engine_cs *ring; + u32 head; u32 tail; int space; -- cgit v1.2.3 From 48d823878d64f93163f5a949623346748bbce1b4 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:23 +0100 Subject: drm/i915/bdw: Generic logical ring init and cleanup Allocate and populate the default LRC for every ring, call gen-specific init/cleanup, init/fini the command parser and set the status page (now inside the LRC object). These are things all engines/rings have in common. Stopping the ring before cleanup and initializing the seqnos is left as a TODO task (we need more infrastructure in place before we can achieve this). v2: Check the ringbuffer backing obj for ring_is_initialized, instead of the context backing obj (similar, but not exactly the same). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +--- 4 files changed, 70 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bcb41002aa13..7a08f3e9e1ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -445,10 +445,6 @@ int i915_gem_context_init(struct drm_device *dev) /* NB: RCS will hold a ref for all rings */ ring->default_context = ctx; - - /* FIXME: we really only want to do this for initialized rings */ - if (i915.enable_execlists) - intel_lr_context_deferred_create(ctx, ring); } DRM_DEBUG_DRIVER("%s context support initialized\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9c2ff8f11c90..ed7a4ff3bbd2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -110,12 +110,60 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - /* TODO */ + if (!intel_ring_initialized(ring)) + return; + + /* TODO: make sure the ring is stopped */ + ring->preallocated_lazy_request = NULL; + ring->outstanding_lazy_seqno = 0; + + if (ring->cleanup) + ring->cleanup(ring); + + i915_cmd_parser_fini_ring(ring); + + if (ring->status_page.obj) { + kunmap(sg_page(ring->status_page.obj->pages->sgl)); + ring->status_page.obj = NULL; + } } static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) { - /* TODO */ + int ret; + struct intel_context *dctx = ring->default_context; + struct drm_i915_gem_object *dctx_obj; + + /* Intentionally left blank. */ + ring->buffer = NULL; + + ring->dev = dev; + INIT_LIST_HEAD(&ring->active_list); + INIT_LIST_HEAD(&ring->request_list); + init_waitqueue_head(&ring->irq_queue); + + ret = intel_lr_context_deferred_create(dctx, ring); + if (ret) + return ret; + + /* The status page is offset 0 from the context object in LRCs. */ + dctx_obj = dctx->engine[ring->id].state; + ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj); + ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl)); + if (ring->status_page.page_addr == NULL) + return -ENOMEM; + ring->status_page.obj = dctx_obj; + + ret = i915_cmd_parser_init_ring(ring); + if (ret) + return ret; + + if (ring->init) { + ret = ring->init(ring); + if (ret) + return ret; + } + return 0; } @@ -399,6 +447,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + if (ctx->engine[ring->id].state) + return 0; context_size = round_up(get_lr_context_size(ring), 4096); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c35f956ed6a0..e4b97f5c5797 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -40,6 +40,23 @@ */ #define CACHELINE_BYTES 64 +bool +intel_ring_initialized(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (!dev) + return false; + + if (i915.enable_execlists) { + struct intel_context *dctx = ring->default_context; + struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; + + return ringbuf->obj; + } else + return ring->buffer && ring->buffer->obj; +} + static inline int __ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fe9d9d9d3598..fbe54ef6a9a1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,7 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool -intel_ring_initialized(struct intel_engine_cs *ring) -{ - return ring->buffer && ring->buffer->obj; -} +bool intel_ring_initialized(struct intel_engine_cs *ring); static inline unsigned intel_ring_flag(struct intel_engine_cs *ring) -- cgit v1.2.3 From 9b1136d505b1de5478e11b59ca59cf8ce2a33217 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:24 +0100 Subject: drm/i915/bdw: GEN-specific logical ring init Logical rings do not need most of the initialization their legacy ringbuffer counterparts do: we just need the pipe control object for the render ring, enable Execlists on the hardware and a few workarounds. v2: Squash with: "drm/i915: Extract pipe control fini & make init outside accesible". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Make checkpatch happy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 78 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed7a4ff3bbd2..1a1f5f98f05b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,49 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +static int gen8_init_common_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + POSTING_READ(RING_MODE_GEN7(ring)); + DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name); + + memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); + + return 0; +} + +static int gen8_init_render_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_init_common_ring(ring); + if (ret) + return ret; + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv + */ + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + + ret = intel_init_pipe_control(ring); + if (ret) + return ret; + + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + return ret; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -178,6 +221,9 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->init = gen8_init_render_ring; + ring->cleanup = intel_fini_pipe_control; + return logical_ring_init(dev, ring); } @@ -192,6 +238,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -206,6 +254,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -220,6 +270,8 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -234,6 +286,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4b97f5c5797..dab5e7c79036 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -597,8 +597,25 @@ out: return ret; } -static int -init_pipe_control(struct intel_engine_cs *ring) +void +intel_fini_pipe_control(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (ring->scratch.obj == NULL) + return; + + if (INTEL_INFO(dev)->gen >= 5) { + kunmap(sg_page(ring->scratch.obj->pages->sgl)); + i915_gem_object_ggtt_unpin(ring->scratch.obj); + } + + drm_gem_object_unreference(&ring->scratch.obj->base); + ring->scratch.obj = NULL; +} + +int +intel_init_pipe_control(struct intel_engine_cs *ring) { int ret; @@ -673,7 +690,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); if (INTEL_INFO(dev)->gen >= 5) { - ret = init_pipe_control(ring); + ret = intel_init_pipe_control(ring); if (ret) return ret; } @@ -708,16 +725,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) dev_priv->semaphore_obj = NULL; } - if (ring->scratch.obj == NULL) - return; - - if (INTEL_INFO(dev)->gen >= 5) { - kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(ring->scratch.obj); - } - - drm_gem_object_unreference(&ring->scratch.obj->base); - ring->scratch.obj = NULL; + intel_fini_pipe_control(ring); } static int gen8_rcs_signal(struct intel_engine_cs *signaller, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbe54ef6a9a1..677df0d7be48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_engine_cs *ring); int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); +void intel_fini_pipe_control(struct intel_engine_cs *ring); +int intel_init_pipe_control(struct intel_engine_cs *ring); + int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_bsd2_ring_buffer(struct drm_device *dev); -- cgit v1.2.3 From 82e104cc266c6da30a30fc5028b2f0236c669cd7 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:26 +0100 Subject: drm/i915/bdw: New logical ring submission mechanism Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 189 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 13 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 217 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c9518c6261de..31025847d680 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +{ + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ringbuf->ring)) + return; + + /* TODO: how to submit a context to the ELSP is not here yet */ +} + +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +{ + if (ring->outstanding_lazy_seqno) + return 0; + + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ringbuf->last_retired_head != -1) { + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= bytes) { + seqno = request->seqno; + break; + } + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + /* TODO: make sure we update the right ringbuffer's last_retired_head + * when retiring requests */ + i915_gem_retire_requests_ring(ring); + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + return 0; +} + +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long end; + int ret; + + ret = logical_ring_wait_request(ringbuf, bytes); + if (ret != -ENOSPC) + return ret; + + /* Force the context submission in case we have been skipping it */ + intel_logical_ring_advance_and_submit(ringbuf); + + /* With GEM the hangcheck timer should kick us out of the loop, + * leaving it early runs the risk of corrupting GEM state (due + * to running on almost untested codepaths). But on resume + * timers don't work yet, so prevent a complete hang in that + * case by choosing an insanely large timeout. */ + end = jiffies + 60 * HZ; + + do { + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) { + ret = 0; + break; + } + + msleep(1); + + if (dev_priv->mm.interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + break; + + if (time_after(jiffies, end)) { + ret = -EBUSY; + break; + } + } while (1); + + return ret; +} + +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf) +{ + uint32_t __iomem *virt; + int rem = ringbuf->size - ringbuf->tail; + + if (ringbuf->space < rem) { + int ret = logical_ring_wait_for_space(ringbuf, rem); + + if (ret) + return ret; + } + + virt = ringbuf->virtual_start + ringbuf->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ringbuf->tail = 0; + ringbuf->space = intel_ring_space(ringbuf); + + return 0; +} + +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) +{ + int ret; + + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { + ret = logical_ring_wrap_buffer(ringbuf); + if (unlikely(ret)) + return ret; + } + + if (unlikely(ringbuf->space < bytes)) { + ret = logical_ring_wait_for_space(ringbuf, bytes); + if (unlikely(ret)) + return ret; + } + + return 0; +} + +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + return ret; + + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t)); + if (ret) + return ret; + + /* Preallocate the olr before touching the ring */ + ret = logical_ring_alloc_seqno(ring); + if (ret) + return ret; + + ringbuf->space -= num_dwords * sizeof(uint32_t); + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index bf0eff4e9f08..4e032875c1fd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) +{ + ringbuf->tail &= ringbuf->size - 1; +} +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, + u32 data) +{ + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); + ringbuf->tail += 4; +} +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dab5e7c79036..0bfa018fab20 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) return ring->buffer && ring->buffer->obj; } -static inline int __ring_space(int head, int tail, int size) +int __intel_ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); if (space < 0) @@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_ringbuffer *ringbuf) +int intel_ring_space(struct intel_ringbuffer *ringbuf) { - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); + return __intel_ring_space(ringbuf->head & HEAD_ADDR, + ringbuf->tail, ringbuf->size); } -static bool intel_ring_stopped(struct intel_engine_cs *ring) +bool intel_ring_stopped(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); @@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) return 0; } list_for_each_entry(request, &ring->request_list, list) { - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= n) { seqno = request->seqno; break; } @@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } @@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 677df0d7be48..81bad364e36d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring) struct intel_ringbuffer *ringbuf = ring->buffer; ringbuf->tail &= ringbuf->size - 1; } +int __intel_ring_space(int head, int tail, int size); +int intel_ring_space(struct intel_ringbuffer *ringbuf); +bool intel_ring_stopped(struct intel_engine_cs *ring); void __intel_ring_advance(struct intel_engine_cs *ring); int __must_check intel_ring_idle(struct intel_engine_cs *ring); -- cgit v1.2.3 From 4712274c362b7730a1c6e01c9a51a6d46f5b7f43 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:28 +0100 Subject: drm/i915/bdw: GEN-specific logical ring emit flush Same as the legacy-style ring->flush. v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS rings (but still consolidate the blt and bsd ring flushes into one). This was noticed by Brad Volkin. v3: The command for BSD and for other rings is slightly different: get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 85 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 --- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++++ 3 files changed, 95 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 94f8b4087642..a88fa6e9360b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -340,6 +340,86 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 unused) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + cmd = MI_FLUSH_DW + 1; + + if (ring == &dev_priv->ring[VCS]) { + if (invalidate_domains & I915_GEM_GPU_DOMAINS) + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | + MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } else { + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ + intel_logical_ring_emit(ringbuf, 0); /* value */ + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, flags); + intel_logical_ring_emit(ringbuf, scratch_addr); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); @@ -451,6 +531,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush_render; return logical_ring_init(dev, ring); } @@ -470,6 +551,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -489,6 +571,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -508,6 +591,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -527,6 +611,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bfa018fab20..4236014c1cda 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 467885159a80..e497837c7724 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,13 @@ #define I915_CMD_HASH_ORDER 9 +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" @@ -218,6 +225,9 @@ struct intel_engine_cs { /* Execlists */ int (*emit_request)(struct intel_ringbuffer *ringbuf); + int (*emit_flush)(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains); /** * List of objects currently involved in rendering from the -- cgit v1.2.3 From 896ab1a5d54269b463a24194c2e4a369103b46d8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:51 +0200 Subject: drm/i915: Fix up checks for aliasing ppgtt A subsequent patch will no longer initialize the aliasing ppgtt if we have full ppgtt enabled, since we simply don't need that any more. Unfortunately a few places check for the aliasing ppgtt instead of checking for ppgtt in general. Fix them up. One special case are the gtt offset and size macros, which have some code to remap the aliasing ppgtt to the global gtt. The aliasing ppgtt is _not_ a logical address space, so passing that in as the vm is plain and simple a bug. So just WARN about it and carry on - we have a gracefully fall-through anyway if we can't find the vma. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d92fb4a..c45856bcc8b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -842,8 +842,6 @@ finish: */ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - if (!ring->needs_cmd_parser) return false; @@ -852,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) * disabled. That will cause all of the parser's PPGTT checks to * fail. For now, disable parsing when PPGTT is off. */ - if (!dev_priv->mm.aliasing_ppgtt) + if (USES_PPGTT(ring->dev)) return false; return (i915.enable_cmd_parser == 1); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1763fbf34e1d..895f9f2f35ea 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -999,7 +999,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_WT(dev); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev); + value = USES_PPGTT(dev); break; case I915_PARAM_HAS_WAIT_TIMEOUT: value = 1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8404a439502..6a7795097017 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5106,9 +5106,7 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, vma_link) { if (vma->vm == vm) @@ -5149,9 +5147,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); BUG_ON(list_empty(&o->vma_list)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4236014c1cda..13543f8528c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2006,9 +2006,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && - !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); -- cgit v1.2.3 From cc9130be805d955f0e06642e57741dd9df1fbc86 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:42 +0100 Subject: drm/i915/bdw: Make sure gpu reset still works with Execlists If we reset a ring after a hang, we have to make sure that we clear out all queued Execlists requests. v2: The ring is, at this point, already being correctly re-programmed for Execlists, and the hangcheck counters cleared. v3: Daniel suggests to drop the "if (execlists)" because the Execlists queue should be empty in legacy mode (which is true, if we do the INIT_LIST_HEAD). v4: Do the pending intel_runtime_pm_put Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 13 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32fa1e9eb844..aa4103bdd352 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2551,6 +2551,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_free_request(request); } + while (!list_empty(&ring->execlist_queue)) { + struct intel_ctx_submit_request *submit_req; + + submit_req = list_first_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + list_del(&submit_req->execlist_link); + intel_runtime_pm_put(dev_priv); + i915_gem_context_unreference(submit_req->ctx); + kfree(submit_req); + } + /* These may not have been flush before the reset, do so now */ kfree(ring->preallocated_lazy_request); ring->preallocated_lazy_request = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13543f8528c2..4fb1ec95ec08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1612,6 +1612,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->execlist_queue); ringbuf->size = 32 * PAGE_SIZE; ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); -- cgit v1.2.3 From c5ad011d7d256ecbe173324029e992817194d2b0 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 4 Aug 2014 03:51:38 -0700 Subject: drm/i915: FBC flush nuke for BDW According to spec FBC on BDW and HSW are identical without any gaps. So let's copy the nuke and let FBC really start compressing stuff. Without this patch we can verify with false color that nothing is being compressed. With the nuke in place and false color it is possible to see false color debugs. Unfortunatelly on some rings like BCS on BDW we have to avoid Bits 22:18 on LRIs due to a high risk of hung. So, when using Blt ring for frontbuffer rend cache would never been cleaned and FBC would stop compressing buffer. One alternative is to cache clean on software frontbuffer tracking. v2: Fix rebase conflict. v3: Do not clean cache on BCS ring. Instead use sw frontbuffer tracking. Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++++++++- 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5957db463e27..f13f30d65172 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2772,6 +2772,7 @@ extern void intel_modeset_setup_hw_state(struct drm_device *dev, extern void i915_redisable_vga(struct drm_device *dev); extern void i915_redisable_vga_power_on(struct drm_device *dev); extern bool intel_fbc_enabled(struct drm_device *dev); +extern void gen8_fbc_sw_flush(struct drm_device *dev, u32 value); extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void intel_init_pch_refclk(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 794aa5e611eb..cbd159163722 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9082,6 +9082,9 @@ void intel_frontbuffer_flush(struct drm_device *dev, intel_mark_fb_busy(dev, frontbuffer_bits, NULL); intel_edp_psr_flush(dev, frontbuffer_bits); + + if (IS_GEN8(dev)) + gen8_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); } /** diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2253f878adf4..b9edfd426a19 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -345,6 +345,16 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->display.fbc_enabled(dev); } +void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!IS_GEN8(dev)) + return; + + I915_WRITE(MSG_FBC_REND_STATE, value); +} + static void intel_fbc_work_fn(struct work_struct *__work) { struct intel_fbc_work *work = diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4fb1ec95ec08..de7654623acc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -444,7 +444,14 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, return ret; } - return gen8_emit_pipe_control(ring, flags, scratch_addr); + ret = gen8_emit_pipe_control(ring, flags, scratch_addr); + if (ret) + return ret; + + if (!invalidate_domains && flush_domains) + return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); + + return 0; } static void ring_write_tail(struct intel_engine_cs *ring, -- cgit v1.2.3 From 86d7f23842f1bce3ab5e8c8d0c676112bbc4c99b Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 26 Aug 2014 14:44:50 +0100 Subject: drm/i915/bdw: Apply workarounds in render ring init function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For BDW workarounds are currently initialized in init_clock_gating() but they are lost during reset, suspend/resume etc; this patch moves the WAs that are part of register state context to render ring init fn otherwise default context ends up with incorrect values as they don't get initialized until init_clock_gating fn. v2: Add workarounds to golden render state This method has its own issues, first of all this is different for each gen and it is generated using a tool so adding new workaround and mainitaining them across gens is not a straightforward process. v3: Use LRIs to emit these workarounds (Ville) Instead of modifying the golden render state the same LRIs are emitted from within the driver. v4: Use abstract name when exporting gen specific routines (Chris) For: VIZ-4092 Signed-off-by: Arun Siluvery Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 6 +++ drivers/gpu/drm/i915/intel_pm.c | 48 -------------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 79 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 4 files changed, 87 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 62ee178b1edb..a5221d8f1580 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -628,6 +628,12 @@ done: ring->last_context = to; if (uninitialized) { + if (ring->init_context) { + ret = ring->init_context(ring); + if (ret) + DRM_ERROR("ring init context: %d\n", ret); + } + ret = i915_gem_render_state_init(ring); if (ret) DRM_ERROR("init render state: %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b9edfd426a19..718023859686 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5536,37 +5536,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev) /* FIXME(BDW): Check all the w/a, some might only apply to * pre-production hw. */ - /* WaDisablePartialInstShootdown:bdw */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - - /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - /* - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for - * pre-production hardware - */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); - I915_WRITE(COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); - - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); - - /* WaDisableDopClockGating:bdw May not be needed for production */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -5582,31 +5557,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev) BDW_DPRS_MASK_VBLANK_SRD); } - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - I915_WRITE(HDC_CHICKEN0, - I915_READ(HDC_CHICKEN0) | - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); - /* WaVSRefCountFullforceMissDisable:bdw */ /* WaDSRefCountFullforceMissDisable:bdw */ I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); @@ -5614,10 +5570,6 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* Wa4x4STCOptimizationDisable:bdw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); - lpt_init_clock_gating(dev); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index de7654623acc..1d5bfdb4fe97 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -657,6 +657,84 @@ err: return ret; } +static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, + u32 addr, u32 value) +{ + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, addr); + intel_ring_emit(ring, value); +} + +static int gen8_init_workarounds(struct intel_engine_cs *ring) +{ + int ret; + + /* + * workarounds applied in this fn are part of register state context, + * they need to be re-initialized followed by gpu reset, suspend/resume, + * module reload. + */ + + /* + * update the number of dwords required based on the + * actual number of workarounds applied + */ + ret = intel_ring_begin(ring, 24); + if (ret) + return ret; + + /* WaDisablePartialInstShootdown:bdw */ + /* WaDisableThreadStallDopClockGating:bdw */ + /* FIXME: Unclear whether we really need this on production bdw. */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE + | STALL_DOP_GATING_DISABLE)); + + /* WaDisableDopClockGating:bdw May not be needed for production */ + intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + /* + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for + * pre-production hardware + */ + intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS + | GEN8_SAMPLER_POWER_BYPASS_DIS)); + + intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + + intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + intel_ring_emit_wa(ring, HDC_CHICKEN0, + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + + /* Wa4x4STCOptimizationDisable:bdw */ + intel_ring_emit_wa(ring, CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + intel_ring_emit_wa(ring, GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + + intel_ring_advance(ring); + + return 0; +} + static int init_render_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -2143,6 +2221,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } + ring->init_context = gen8_init_workarounds; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9cbf7b0ebc99..96479c89f4bd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -148,6 +148,8 @@ struct intel_engine_cs { int (*init)(struct intel_engine_cs *ring); + int (*init_context)(struct intel_engine_cs *ring); + void (*write_tail)(struct intel_engine_cs *ring, u32 value); int __must_check (*flush)(struct intel_engine_cs *ring, -- cgit v1.2.3 From 888b59951ed5ac450fe3ddd7b3937d905b9bafbc Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 26 Aug 2014 14:44:51 +0100 Subject: drm/i915/bdw: Export workaround data to debugfs The workarounds that are applied are exported to a debugfs file; this is used to verify their state after the test case (reset or suspend/resume etc). This patch is only required to support i-g-t. Signed-off-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 40 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 14 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9d93af126f9c..1467cc1a47a9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2628,6 +2628,45 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) return 0; } +static int intel_wa_registers(struct seq_file *m, void *unused) +{ + int i; + int ret; + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!IS_BROADWELL(dev)) { + DRM_DEBUG_DRIVER("Workaround table not available !!\n"); + return -EINVAL; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + intel_runtime_pm_get(dev_priv); + + seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs); + for (i = 0; i < dev_priv->num_wa_regs; ++i) { + u32 addr, mask; + + addr = dev_priv->intel_wa_regs[i].addr; + mask = dev_priv->intel_wa_regs[i].mask; + dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask; + if (dev_priv->intel_wa_regs[i].addr) + seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", + dev_priv->intel_wa_regs[i].addr, + dev_priv->intel_wa_regs[i].value, + dev_priv->intel_wa_regs[i].mask); + } + + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + struct pipe_crc_info { const char *name; struct drm_device *dev; @@ -4159,6 +4198,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_semaphore_status", i915_semaphore_status, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, + {"intel_wa_registers", intel_wa_registers, 0} }; #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f13f30d65172..95820652e153 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1556,6 +1556,20 @@ struct drm_i915_private { struct intel_shared_dpll shared_dplls[I915_NUM_PLLS]; int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; + /* + * workarounds are currently applied at different places and + * changes are being done to consolidate them so exact count is + * not clear at this point, use a max value for now. + */ +#define I915_MAX_WA_REGS 16 + struct { + u32 addr; + u32 value; + /* bitmask representing WA bits */ + u32 mask; + } intel_wa_regs[I915_MAX_WA_REGS]; + u32 num_wa_regs; + /* Reclocking support */ bool render_reclock_avail; bool lvds_downclock_avail; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d5bfdb4fe97..14b517d61b4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -660,20 +660,40 @@ err: static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, u32 addr, u32 value) { + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) + return; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, addr); intel_ring_emit(ring, value); + + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; + /* value is updated with the status of remaining bits of this + * register when it is read from debugfs file + */ + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; + dev_priv->num_wa_regs++; + + return; } static int gen8_init_workarounds(struct intel_engine_cs *ring) { int ret; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; /* * workarounds applied in this fn are part of register state context, * they need to be re-initialized followed by gpu reset, suspend/resume, * module reload. */ + dev_priv->num_wa_regs = 0; + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); /* * update the number of dwords required based on the @@ -732,6 +752,9 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) intel_ring_advance(ring); + DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", + dev_priv->num_wa_regs); + return 0; } -- cgit v1.2.3 From 00e1e623e62cd8452e28633182b91ddcbb70cc7c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 27 Aug 2014 17:33:12 +0300 Subject: drm/i915: Init some CHV workarounds via LRIs in ring->init_context() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the BDW example and apply the workarounds touching registers which are saved in the context image through LRIs in the new ring->init_context() hook. This makes Mesa much happier and eg. glxgears doesn't hang after the first frame. Cc: Arun Siluvery Signed-off-by: Ville Syrjälä [danvet: Add missing wa table initialization to avoid a functional conflict with Arun's wa table debugfs support.] Reviewed-by: "Barbalho, Rafael" Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 ---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 46 +++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6bf533100dad..376cc2c8751a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6011,14 +6011,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); - /* WaDisablePartialInstShootdown:chv */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - - /* WaDisableThreadStallDopClockGating:chv */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ I915_WRITE(GEN7_FF_THREAD_MODE, @@ -6037,10 +6029,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); - /* WaDisableGunitClockGating:chv (pre-production hw) */ I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | GINT_DIS); @@ -6050,8 +6038,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); /* WaDisableDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 14b517d61b4e..12135ef36060 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -681,7 +681,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, return; } -static int gen8_init_workarounds(struct intel_engine_cs *ring) +static int bdw_init_workarounds(struct intel_engine_cs *ring) { int ret; struct drm_device *dev = ring->dev; @@ -758,6 +758,45 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) return 0; } +static int chv_init_workarounds(struct intel_engine_cs *ring) +{ + int ret; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + /* + * workarounds applied in this fn are part of register state context, + * they need to be re-initialized followed by gpu reset, suspend/resume, + * module reload. + */ + dev_priv->num_wa_regs = 0; + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); + + ret = intel_ring_begin(ring, 12); + if (ret) + return ret; + + /* WaDisablePartialInstShootdown:chv */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + + /* WaDisableThreadStallDopClockGating:chv */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + + /* WaDisableDopClockGating:chv (pre-production hw) */ + intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ + intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + + intel_ring_advance(ring); + + return 0; +} + static int init_render_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -2244,7 +2283,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - ring->init_context = gen8_init_workarounds; + if (IS_CHERRYVIEW(dev)) + ring->init_context = chv_init_workarounds; + else + ring->init_context = bdw_init_workarounds; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- cgit v1.2.3 From 55820e1e840def3802fc366607f2b25e31036ab1 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:00 +0100 Subject: drm/i915: Don't overrun the intel_wa_regs array When entering intel_ring_emit_wa() with num_wa_regs equal to I915_MAX_WA_REGS, we end up indexing the intel_wa_regs array beyond its allocation. Fix the check then. Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 12135ef36060..96618c0c5085 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -663,7 +663,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) + if (dev_priv->num_wa_regs >= I915_MAX_WA_REGS) return; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); -- cgit v1.2.3 From 04ad2dc7116347a4219b13935c7569ceaab95155 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:01 +0100 Subject: drm/i915: Don't silently discard workarounds If we happen to emit more than I915_MAX_WA_REGS workarounds, we will currently discard them, not even emit the LRI. Not really what we want, so warn loudly. Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 96618c0c5085..5ef0f994b0bb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -663,7 +663,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->num_wa_regs >= I915_MAX_WA_REGS) + if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) return; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); -- cgit v1.2.3 From b07ba1dc78a251fc02992a35b0fd8757029566e4 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:02 +0100 Subject: drm/i915: Remove unneeded brackets Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5ef0f994b0bb..f8aadc3c2124 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -671,7 +671,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, intel_ring_emit(ring, value); dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; /* value is updated with the status of remaining bits of this * register when it is read from debugfs file */ -- cgit v1.2.3 From 95468892fdfeef6d1004b524e35957629efdbe00 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Aug 2014 15:39:54 +0100 Subject: drm/i915: Reset the HEAD pointer for the ring after writing START MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville found an old w/a documented for g4x that suggested that we need to reset the HEAD after writing START. This is a useful fixup for some of the g4x ring initialisation woes, but as usual, not all. v2: Do the rewrite unconditionally anyway References: https://bugs.freedesktop.org/show_bug.cgi?id=76554 Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f8aadc3c2124..85fc2b1a124a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -563,6 +563,14 @@ static int init_ring_common(struct intel_engine_cs *ring) * also enforces ordering), otherwise the hw might lose the new ring * register values. */ I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); + + /* WaClearRingBufHeadRegAtInit:ctg,elk */ + if (I915_READ_HEAD(ring)) + DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", + ring->name, I915_READ_HEAD(ring)); + I915_WRITE_HEAD(ring, 0); + (void)I915_READ_HEAD(ring); + I915_WRITE_CTL(ring, ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); -- cgit v1.2.3 From 770722585639bc2da683e72f610d5f614298e415 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Sep 2014 12:18:27 +0100 Subject: drm/i915: HSW always use GGTT selector for secure batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen6 and earlier conflate address space selection (ppgtt vs ggtt) with the security bit (i.e. only privileged batches were allowed to run from ggtt). From Haswell only, you are able to select the security bit separate from the address space - and we always requested to use ppgtt. This breaks the golden render state batch execution with full-ppgtt as that is only present in the global GTT and more generally any secure batch that is not colocated in the ppgtt and ggtt. So we need to disable the use of the ppgtt selector bit for secure batches, or else we hang immediately upon boot and thence after every GPU reset... v2: Only HSW differentiates between secure dispatch and ggtt, so simply ignore the differentiation and always use secure==ggtt. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä [danvet: Rectify commit message as noted by Chris.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 109de2eeb9a8..25795f2efdcb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2203,8 +2203,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, - MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); + MI_BATCH_BUFFER_START | + (flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- cgit v1.2.3 From 7cd512f1520f85bf8e45f75b82fece58f0265cec Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 15 Sep 2014 11:38:57 +0200 Subject: drm/i915: Fix irq checks in ring->irq_get/put functions Yet another place that wasn't properly transformed when implementing SOix. While at it convert the checks to WARN_ON on gen5+ (since we don't have UMS potentially doing stupid things on those platforms). And also add the corresponding checks to the put functions (again with a WARN_ON) for gen5+. v2: Drop the WARNINGS in the irq_put functions (including the existing one for vebox), Chris convinced me that they're not that terribly useful. v3: Don't forget about execlist code. Cc: Imre Deak Cc: Jesse Barnes Cc: "Volkin, Bradley D" Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bafd38b5703e..803fc38664c4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1063,7 +1063,7 @@ static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 25795f2efdcb..922d6bc1a1b3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1197,7 +1197,7 @@ gen5_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -1228,7 +1228,7 @@ i9xx_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (!intel_irqs_enabled(dev_priv)) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -1265,7 +1265,7 @@ i8xx_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (!intel_irqs_enabled(dev_priv)) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -1399,8 +1399,8 @@ gen6_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) - return false; + if (WARN_ON(!intel_irqs_enabled(dev_priv))) + return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { @@ -1442,7 +1442,7 @@ hsw_vebox_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); @@ -1462,9 +1462,6 @@ hsw_vebox_put_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) - return; - spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { I915_WRITE_IMR(ring, ~0); @@ -1480,7 +1477,7 @@ gen8_ring_get_irq(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; unsigned long flags; - if (!dev->irq_enabled) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); -- cgit v1.2.3 From d37cf5f7e1b315585940a735a8508d955ffc0f16 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 19 Sep 2014 20:05:26 +0300 Subject: drm/i915/bdw: Cleanup pre prod workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit as these have been fixed in production hw and hurt performance if applied. v2: adjust requested ring space (Ville) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83482 Tested-by: zhoujian Signed-off-by: Mika Kuoppala Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 25795f2efdcb..6dc981f0671e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -707,7 +707,7 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * update the number of dwords required based on the * actual number of workarounds applied */ - ret = intel_ring_begin(ring, 24); + ret = intel_ring_begin(ring, 18); if (ret) return ret; @@ -722,19 +722,8 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - /* - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for - * pre-production hardware - */ intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS - | GEN8_SAMPLER_POWER_BYPASS_DIS)); - - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); - - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB -- cgit v1.2.3 From fbdcb06880bf414afafd4053d0d9906725f8b117 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 13 Feb 2013 15:27:34 +0000 Subject: drm/i915/skl: don't set the AsyncFlip performance mode for Gen9+ The following sets the AsyncFlip performance mode for everything above Gen6: commit 4790cb36b3eede8fb0cca529dc1d31b9936fa24b Author: Chris Wilson Date: Sun Jan 20 16:11:20 2013 +0000 drm/i915: Disable AsyncFlip performance optimisations Starting from Gen9 the MI_MODE register layout changes and doesn't include the above bit. Reviewed-by: Thomas Wood Signed-off-by: Imre Deak Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 109de2eeb9a8..a6a64aee8f74 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -823,7 +823,7 @@ static int init_render_ring(struct intel_engine_cs *ring) * * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv */ - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); /* Required for the hardware to program scanline values for waiting */ -- cgit v1.2.3 From 1d73c2a8f218be3e8b6aa884740fc67110660b54 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 24 Sep 2014 19:50:59 -0400 Subject: drm/i915: Minimize the huge amount of unecessary fbc sw cache clean. The sw cache clean on BDW is a tempoorary workaround because we cannot set cache clean on blt ring with risk of hungs. So we are doing the cache clean on sw. However we are doing much more than needed. Not only when using blt ring. So, with this extra w/a we minimize the ammount of cache cleans and call it only on same cases that it was being called on gen7. The traditional FBC Cache clean happens over LRI on BLT ring when there is a frontbuffer touch happening. frontbuffer tracking set fbc_dirty variable to let BLT flush that it must clean FBC cache. fbc.need_sw_cache_clean works in the opposite information direction of ring->fbc_dirty telling software on frontbuffer tracking to perform the cache clean on sw side. v2: Clean it a little bit and fully check for Broadwell instead of gen8. v3: Rebase after frontbuffer organization. v4: Wiggle confused me. So fixing v3! Cc: Daniel Vetter Cc: Paulo Zanoni Reviewed-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/intel_frontbuffer.c | 6 ++++-- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++++++-- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 999bd57cab65..cccb7767e837 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -666,6 +666,14 @@ struct i915_fbc { * possible. */ bool enabled; + /* On gen8 some rings cannont perform fbc clean operation so for now + * we are doing this on SW with mmio. + * This variable works in the opposite information direction + * of ring->fbc_dirty telling software on frontbuffer tracking + * to perform the cache clean on sw side. + */ + bool need_sw_cache_clean; + struct intel_fbc_work { struct delayed_work work; struct drm_crtc *crtc; @@ -2825,7 +2833,7 @@ extern void intel_modeset_setup_hw_state(struct drm_device *dev, extern void i915_redisable_vga(struct drm_device *dev); extern void i915_redisable_vga_power_on(struct drm_device *dev); extern bool intel_fbc_enabled(struct drm_device *dev); -extern void gen8_fbc_sw_flush(struct drm_device *dev, u32 value); +extern void bdw_fbc_sw_flush(struct drm_device *dev, u32 value); extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void intel_init_pch_refclk(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index f74744c091cb..7eb74a62117f 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -189,8 +189,10 @@ void intel_frontbuffer_flush(struct drm_device *dev, * needs to be reworked into a proper frontbuffer tracking scheme like * psr employs. */ - if (IS_BROADWELL(dev)) - gen8_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); + if (dev_priv->fbc.need_sw_cache_clean) { + dev_priv->fbc.need_sw_cache_clean = false; + bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); + } } /** diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6b416201240f..011892d5356e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -356,7 +356,7 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->fbc.enabled; } -void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) +void bdw_fbc_sw_flush(struct drm_device *dev, u32 value) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 922d6bc1a1b3..620a89dc868b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2237,6 +2237,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, u32 invalidate, u32 flush) { struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; uint32_t cmd; int ret; @@ -2267,8 +2268,12 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, } intel_ring_advance(ring); - if (IS_GEN7(dev) && !invalidate && flush) - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + if (!invalidate && flush) { + if (IS_GEN7(dev)) + return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + else if (IS_BROADWELL(dev)) + dev_priv->fbc.need_sw_cache_clean = true; + } return 0; } -- cgit v1.2.3 From da09654d777c361006f6ea3452f8de4a374d5783 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 19 Sep 2014 20:16:27 -0400 Subject: drm/i915/bdw: WaDisableFenceDestinationToSLM This WA affect BDW GT3 pre-production steppings. Signed-off-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala [danvet: Don't mention steppings ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ad8179b40d19..124ea60c1386 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4836,6 +4836,7 @@ enum punit_power_well { /* GEN8 chicken */ #define HDC_CHICKEN0 0x7300 #define HDC_FORCE_NON_COHERENT (1<<4) +#define HDC_FENCE_DEST_SLM_DISABLE (1<<14) /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 620a89dc868b..c21aaad55982 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -740,8 +740,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ + /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT | + (IS_BDW_GT3(dev) ? + HDC_FENCE_DEST_SLM_DISABLE : 0) + )); /* Wa4x4STCOptimizationDisable:bdw */ intel_ring_emit_wa(ring, CACHE_MODE_1, -- cgit v1.2.3 From 101b376d358e2f724db5e0ac4d207079b16c4754 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 9 Oct 2014 07:11:47 -0700 Subject: drm/i915/bdw: Remove BDW preproduction W/As until C stepping. Let's clean this a bit v2: Rebase after other Mika's patch that removed some BDW production workarounds. v3: Removed stepping info. Reviewed-by: Mika Kuoppala Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 ++--- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index daa99e7e805b..23d331884944 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5654,16 +5654,6 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); - /* FIXME(BDW): Check all the w/a, some might only apply to - * pre-production hw. */ - - - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); - - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); - - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7c0c28c65cb4..5ebe46a05a05 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -712,13 +712,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) return ret; /* WaDisablePartialInstShootdown:bdw */ - /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | STALL_DOP_GATING_DISABLE)); - /* WaDisableDopClockGating:bdw May not be needed for production */ + /* WaDisableDopClockGating:bdw */ intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); -- cgit v1.2.3 From 7225342ab501befdb64bcec76ded41f5897c0855 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 7 Oct 2014 17:21:26 +0300 Subject: drm/i915: Build workaround list in ring initialization If we build the workaround list in ring initialization and decouple it from the actual writing of values, we gain the ability to decide where and how we want to apply the values. The advantage of this will become more clear when we need to initialize workarounds on older gens where it is not possible to write all the registers through ring LRIs. v2: rebase on newest bdw workarounds Cc: Arun Siluvery Cc: Damien Lespiau Signed-off-by: Mika Kuoppala Reviewed-by: Arun Siluvery [danvet: Resolve tiny conflict in comments and ocd alignments a bit.] [danvet2: Remove bogus force_wake_get call spotted by Paulo and QA.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 18 ++-- drivers/gpu/drm/i915/i915_drv.h | 28 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 184 ++++++++++++++++++-------------- 3 files changed, 128 insertions(+), 102 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index da4036d0bab9..62c111b1f0d8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2655,18 +2655,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs); - for (i = 0; i < dev_priv->num_wa_regs; ++i) { + seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count); + for (i = 0; i < dev_priv->workarounds.count; ++i) { u32 addr, mask; - addr = dev_priv->intel_wa_regs[i].addr; - mask = dev_priv->intel_wa_regs[i].mask; - dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask; - if (dev_priv->intel_wa_regs[i].addr) + addr = dev_priv->workarounds.reg[i].addr; + mask = dev_priv->workarounds.reg[i].mask; + dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask; + if (dev_priv->workarounds.reg[i].addr) seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", - dev_priv->intel_wa_regs[i].addr, - dev_priv->intel_wa_regs[i].value, - dev_priv->intel_wa_regs[i].mask); + dev_priv->workarounds.reg[i].addr, + dev_priv->workarounds.reg[i].value, + dev_priv->workarounds.reg[i].mask); } intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9f3d68903caa..d1905b38fc95 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1448,6 +1448,20 @@ struct i915_frontbuffer_tracking { unsigned flip_bits; }; +struct i915_wa_reg { + u32 addr; + u32 value; + /* bitmask representing WA bits */ + u32 mask; +}; + +#define I915_MAX_WA_REGS 16 + +struct i915_workarounds { + struct i915_wa_reg reg[I915_MAX_WA_REGS]; + u32 count; +}; + struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1590,19 +1604,7 @@ struct drm_i915_private { struct intel_shared_dpll shared_dplls[I915_NUM_PLLS]; int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; - /* - * workarounds are currently applied at different places and - * changes are being done to consolidate them so exact count is - * not clear at this point, use a max value for now. - */ -#define I915_MAX_WA_REGS 16 - struct { - u32 addr; - u32 value; - /* bitmask representing WA bits */ - u32 mask; - } intel_wa_regs[I915_MAX_WA_REGS]; - u32 num_wa_regs; + struct i915_workarounds workarounds; /* Reclocking support */ bool render_reclock_avail; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5ebe46a05a05..5f935d4dfb6a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,79 +665,107 @@ err: return ret; } -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, - u32 addr, u32 value) +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) { + int ret, i; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_workarounds *w = &dev_priv->workarounds; - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) - return; + if (WARN_ON(w->count == 0)) + return 0; - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, addr); - intel_ring_emit(ring, value); + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; - /* value is updated with the status of remaining bits of this - * register when it is read from debugfs file - */ - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; - dev_priv->num_wa_regs++; + ret = intel_ring_begin(ring, w->count * 3); + if (ret) + return ret; + + for (i = 0; i < w->count; i++) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); + } + + intel_ring_advance(ring); + + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - return; + return 0; +} + +static int wa_add(struct drm_i915_private *dev_priv, + const u32 addr, const u32 val, const u32 mask) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; } +#define WA_REG(addr, val, mask) { \ + const int r = wa_add(dev_priv, (addr), (val), (mask)); \ + if (r) \ + return r; \ + } + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) + +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff) + static int bdw_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - /* - * update the number of dwords required based on the - * actual number of workarounds applied - */ - ret = intel_ring_begin(ring, 18); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE - | STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ - intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT | - (IS_BDW_GT3(dev) ? - HDC_FENCE_DEST_SLM_DISABLE : 0) - )); + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT | + (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); /* Wa4x4STCOptimizationDisable:bdw */ - intel_ring_emit_wa(ring, CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE); /* * BSpec recommends 8x4 when MSAA is used, @@ -747,52 +775,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - intel_ring_emit_wa(ring, GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - intel_ring_advance(ring); - - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", - dev_priv->num_wa_regs); + WA_SET_BIT_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); return 0; } static int chv_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - ret = intel_ring_begin(ring, 12); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaDisableThreadStallDopClockGating:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:chv (pre-production hw) */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); - intel_ring_advance(ring); + return 0; +} + +static int init_workarounds_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + WARN_ON(ring->id != RCS); + + dev_priv->workarounds.count = 0; + + if (IS_BROADWELL(dev)) + return bdw_init_workarounds(ring); + + if (IS_CHERRYVIEW(dev)) + return chv_init_workarounds(ring); return 0; } @@ -852,7 +878,7 @@ static int init_render_ring(struct intel_engine_cs *ring) if (HAS_L3_DPF(dev)) I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); - return ret; + return init_workarounds_ring(ring); } static void render_ring_cleanup(struct intel_engine_cs *ring) @@ -2298,10 +2324,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - if (IS_CHERRYVIEW(dev)) - ring->init_context = chv_init_workarounds; - else - ring->init_context = bdw_init_workarounds; + + ring->init_context = intel_ring_workarounds_emit; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- cgit v1.2.3 From 22a916aaa187946e8df724ab7838a0c13b45a9f4 Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Wed, 22 Oct 2014 18:59:52 +0100 Subject: drm/i915: Emit even number of dwords when emitting LRIs The number of DWords should be even when doing ring emits as command sequences require QWord alignment. There was some discussion about the maximum length of the MI_LRI command. Quoting Mika "I did some test with bdw: "The maximum is 128 writes, resulting the 8 bit length field of the command being 0xff, thus following the spec. The 128'th write went through. "Perhaps the max command length is then less in older gens? "Perhaps WARN_ON(x > 128) in MI_LOAD_REGISTER_IMM would be in place but one needs minor tweak to command parser a bit also then. #define I915_MAX_WA_REGS 16 keeps us safe for now atleast." Ville commented that on pre-gen6 the length field seems to be restricted to 0x3f though. So for all cases we should be ok. v2: user LRI variant that can write multiple regs in one go (Damien). We can simply insert one NOP at the end instead of one per register write. Cc: Mika Kuoppala Signed-off-by: Arun Siluvery Reviewed-by: Damien Lespiau [danvet: Add a summary of the MI_LRI length discussion.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5f935d4dfb6a..603148e6dbc3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -680,15 +680,16 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) if (ret) return ret; - ret = intel_ring_begin(ring, w->count * 3); + ret = intel_ring_begin(ring, (w->count * 2 + 2)); if (ret) return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, w->reg[i].addr); intel_ring_emit(ring, w->reg[i].value); } + intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); -- cgit v1.2.3 From 6402c330a62685c77c32d1bdfa882759c9f8e8a7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 31 Oct 2014 12:00:26 +0000 Subject: drm/i915: Fix null pointer dereference in ring cleanup code If a ring failed to initialise for any reason then the error path would try to clean up all rings including those that had not yet been allocated. The ring clean up code did a check that the ring was valid before starting its work. Unfortunately, that was after it had already dereferenced the ring to obtain a dev_private pointer. Signed-off-by: John Harrison Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 803fc38664c4..d7525bd6d810 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1214,11 +1214,13 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) */ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv; if (!intel_ring_initialized(ring)) return; + dev_priv = ring->dev->dev_private; + intel_logical_ring_stop(ring); WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); ring->preallocated_lazy_request = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a8f72e8d64e3..f457146ff6a4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1845,12 +1845,15 @@ error: void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = to_i915(ring->dev); - struct intel_ringbuffer *ringbuf = ring->buffer; + struct drm_i915_private *dev_priv; + struct intel_ringbuffer *ringbuf; if (!intel_ring_initialized(ring)) return; + dev_priv = to_i915(ring->dev); + ringbuf = ring->buffer; + intel_stop_ring_buffer(ring); WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); -- cgit v1.2.3 From 3e470eaaee5c064045a88ecc7a9cf75105bb52d8 Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 28 Oct 2014 18:33:12 +0000 Subject: drm/i915/chv: Remove pre-production workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -WaDisableDopClockGating:chv -WaDisableSamplerPowerBypass:chv -WaDisableGunitClockGating:chv -WaDisableFfDopClockGating:chv -WaDisableDopClockGating:chv v2: Remove pre-production WA instead of restricting them based on revision id (Ville) For: VIZ-4090 Signed-off-by: Arun Siluvery Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 12 ------------ drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 2 files changed, 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 300d7e503f96..5764936e3a22 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6958,18 +6958,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) /* WaDisableSDEUnitClockGating:chv */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableGunitClockGating:chv (pre-production hw) */ - I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | - GINT_DIS); - - /* WaDisableFfDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); - - /* WaDisableDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | - GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void g4x_init_clock_gating(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f457146ff6a4..70fcf2aa0ca2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -795,14 +795,6 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - /* WaDisableDopClockGating:chv (pre-production hw) */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - return 0; } -- cgit v1.2.3 From 605f143320b637846674299c323b1893c93d8494 Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 28 Oct 2014 18:33:13 +0000 Subject: drm/i915/chv: Combine GEN8_ROW_CHICKEN w/a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WaDisablePartialInstShootdown:chv and WaDisableThreadStallDopClockGating:chv are related to the same register so combine them. Signed-off-by: Arun Siluvery Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 70fcf2aa0ca2..1df79a952291 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -788,12 +788,10 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) struct drm_i915_private *dev_priv = dev->dev_private; /* WaDisablePartialInstShootdown:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* WaDisableThreadStallDopClockGating:chv */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | + STALL_DOP_GATING_DISABLE); return 0; } -- cgit v1.2.3 From 952890098a14043fe7acc5f595c6306c69baf40d Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 28 Oct 2014 18:33:14 +0000 Subject: drm/i915/chv: Add new workarounds for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit +WaForceEnableNonCoherent:chv +WaHdcDisableFetchWhenMasked:chv For: VIZ-4090 Signed-off-by: Arun Siluvery Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d43fa0e627f8..97d3479ad92e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5131,6 +5131,7 @@ enum punit_power_well { /* GEN8 chicken */ #define HDC_CHICKEN0 0x7300 #define HDC_FORCE_NON_COHERENT (1<<4) +#define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) /* WaCatErrorRejectionIssue */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1df79a952291..a09aae70e579 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -793,6 +793,16 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | STALL_DOP_GATING_DISABLE); + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:chv */ + /* WaHdcDisableFetchWhenMasked:chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT | + HDC_DONOT_FETCH_MEM_WHEN_MASKED); + return 0; } -- cgit v1.2.3 From 771b9a532483cc45df19823b8dfaa0cecfd45836 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 11 Nov 2014 16:47:33 +0000 Subject: drm/i915: Initialize workarounds in logical ring mode too Following the legacy ring submission example, update the ring->init_context() hook to support the execlist submission mode. v2: update to use the new workaround macros and cleanup unused code. This takes care of both bdw and chv workarounds. v2.1: Add missing call to init_context() during deferred context creation. v3: Split init_context (emit) in legacy/lrc modes. For lrc, get the ringbuf from the context (Mika/Daniel). v4: Merge init_context interfaces back, the legacy mode only needs the ring, but the lrc mode needs the ring and context (Mika). Issue: VIZ-4092 Issue: GMIN-3475 Change-Id: Ie3d093b2542ab0e2a44b90460533e2f979788d6c Cc: Deepak S Cc: Mika Kuoppala Cc: Daniel Vetter Signed-off-by: Michel Thierry Signed-off-by: Arun Siluvery Reviewed-by: Mika Kuoppala [danvet: Align function paramater lists properly.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 47 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +++- 4 files changed, 54 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 1fb00008623d..d17ff435f276 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -635,7 +635,7 @@ done: if (uninitialized) { if (ring->init_context) { - ret = ring->init_context(ring); + ret = ring->init_context(ring, to); if (ret) DRM_ERROR("ring init context: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6025ac754c37..2a1a71933420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -989,6 +989,44 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) return 0; } +static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, + struct intel_context *ctx) +{ + int ret, i; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_workarounds *w = &dev_priv->workarounds; + + if (WARN_ON(w->count == 0)) + return 0; + + ring->gpu_caches_dirty = true; + ret = logical_ring_flush_all_caches(ringbuf); + if (ret) + return ret; + + ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + for (i = 0; i < w->count; i++) { + intel_logical_ring_emit(ringbuf, w->reg[i].addr); + intel_logical_ring_emit(ringbuf, w->reg[i].value); + } + intel_logical_ring_emit(ringbuf, MI_NOOP); + + intel_logical_ring_advance(ringbuf); + + ring->gpu_caches_dirty = true; + ret = logical_ring_flush_all_caches(ringbuf); + if (ret) + return ret; + + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -1032,7 +1070,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return ret; + return init_workarounds_ring(ring); } static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, @@ -1282,6 +1320,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; ring->init = gen8_init_render_ring; + ring->init_context = intel_logical_ring_workarounds_emit; ring->cleanup = intel_fini_pipe_control; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; @@ -1763,6 +1802,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } if (ring->id == RCS && !ctx->rcs_initialized) { + if (ring->init_context) { + ret = ring->init_context(ring, ctx); + if (ret) + DRM_ERROR("ring init context: %d\n", ret); + } + ret = intel_lr_context_render_state_init(ring, ctx); if (ret) { DRM_ERROR("Init render state failed: %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a09aae70e579..ae092589ea0c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,7 +665,8 @@ err: return ret; } -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, + struct intel_context *ctx) { int ret, i; struct drm_device *dev = ring->dev; @@ -806,7 +807,7 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) return 0; } -static int init_workarounds_ring(struct intel_engine_cs *ring) +int init_workarounds_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96479c89f4bd..aab2e2f90a74 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -148,7 +148,8 @@ struct intel_engine_cs { int (*init)(struct intel_engine_cs *ring); - int (*init_context)(struct intel_engine_cs *ring); + int (*init_context)(struct intel_engine_cs *ring, + struct intel_context *ctx); void (*write_tail)(struct intel_engine_cs *ring, u32 value); @@ -424,6 +425,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev); u64 intel_ring_get_active_head(struct intel_engine_cs *ring); void intel_ring_setup_status_page(struct intel_engine_cs *ring); +int init_workarounds_ring(struct intel_engine_cs *ring); + static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) { return ringbuf->tail; -- cgit v1.2.3 From 7ba717cf365d79f2b284e508205ec3d4a05fc41b Mon Sep 17 00:00:00 2001 From: Thomas Daniel Date: Thu, 13 Nov 2014 10:28:56 +0000 Subject: drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand Same as with the context, pinning to GGTT regardless is harmful (it badly fragments the GGTT and can even exhaust it). Unfortunately, this case is also more complex than the previous one because we need to map and access the ringbuffer in several places along the execbuffer path (and we cannot make do by leaving the default ringbuffer pinned, as before). Also, the context object itself contains a pointer to the ringbuffer address that we have to keep updated if we are going to allow the ringbuffer to move around. v2: Same as with the context pinning, we cannot really do it during an interrupt. Also, pin the default ringbuffers objects regardless (makes error capture a lot easier). v3: Rebased. Take a pin reference of the ringbuffer for each item in the execlist request queue because the hardware may still be using the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update is executed. The ringbuffer must remain pinned until the context save is complete. No longer pin and unpin ringbuffer in populate_lr_context() - this transient address is meaningless and the pinning can cause a sleep while atomic. v4: Moved ringbuffer pin and unpin into the lr_context_pin functions. Downgraded pinning check BUG_ONs to WARN_ONs. v5: Reinstated WARN_ONs for unexpected execlist states. Removed unused variable. Issue: VIZ-4277 Signed-off-by: Oscar Mateo Signed-off-by: Thomas Daniel Reviewed-by: Akash Goel Reviewed-by: Deepak S Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 102 +++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.c | 85 +++++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 3 files changed, 128 insertions(+), 62 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9560e634c9b8..e588376227ea 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -203,6 +203,9 @@ enum { }; #define GEN8_CTX_ID_SHIFT 32 +static int intel_lr_context_pin(struct intel_engine_cs *ring, + struct intel_context *ctx); + /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists * @dev: DRM device. @@ -354,7 +357,9 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); } -static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) +static int execlists_update_context(struct drm_i915_gem_object *ctx_obj, + struct drm_i915_gem_object *ring_obj, + u32 tail) { struct page *page; uint32_t *reg_state; @@ -363,6 +368,7 @@ static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tai reg_state = kmap_atomic(page); reg_state[CTX_RING_TAIL+1] = tail; + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); kunmap_atomic(reg_state); @@ -373,21 +379,25 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring, struct intel_context *to0, u32 tail0, struct intel_context *to1, u32 tail1) { - struct drm_i915_gem_object *ctx_obj0; + struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state; + struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf; struct drm_i915_gem_object *ctx_obj1 = NULL; + struct intel_ringbuffer *ringbuf1 = NULL; - ctx_obj0 = to0->engine[ring->id].state; BUG_ON(!ctx_obj0); WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj)); - execlists_ctx_write_tail(ctx_obj0, tail0); + execlists_update_context(ctx_obj0, ringbuf0->obj, tail0); if (to1) { + ringbuf1 = to1->engine[ring->id].ringbuf; ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj)); - execlists_ctx_write_tail(ctx_obj1, tail1); + execlists_update_context(ctx_obj1, ringbuf1->obj, tail1); } execlists_elsp_write(ring, ctx_obj0, ctx_obj1); @@ -537,6 +547,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, return -ENOMEM; req->ctx = to; i915_gem_context_reference(req->ctx); + + if (to != ring->default_context) + intel_lr_context_pin(ring, to); + req->ring = ring; req->tail = tail; @@ -557,7 +571,7 @@ static int execlists_context_queue(struct intel_engine_cs *ring, if (to == tail_req->ctx) { WARN(tail_req->elsp_submitted != 0, - "More than 2 already-submitted reqs queued\n"); + "More than 2 already-submitted reqs queued\n"); list_del(&tail_req->execlist_link); list_add_tail(&tail_req->execlist_link, &ring->execlist_retired_req_list); @@ -745,6 +759,12 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring) spin_unlock_irqrestore(&ring->execlist_lock, flags); list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) { + struct intel_context *ctx = req->ctx; + struct drm_i915_gem_object *ctx_obj = + ctx->engine[ring->id].state; + + if (ctx_obj && (ctx != ring->default_context)) + intel_lr_context_unpin(ring, ctx); intel_runtime_pm_put(dev_priv); i915_gem_context_unreference(req->ctx); list_del(&req->execlist_link); @@ -816,6 +836,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, struct intel_context *ctx) { struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; int ret = 0; WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); @@ -823,9 +844,20 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); if (ret) - ctx->engine[ring->id].unpin_count = 0; + goto reset_unpin_count; + + ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); + if (ret) + goto unpin_ctx_obj; } + return ret; + +unpin_ctx_obj: + i915_gem_object_ggtt_unpin(ctx_obj); +reset_unpin_count: + ctx->engine[ring->id].unpin_count = 0; + return ret; } @@ -833,11 +865,14 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, struct intel_context *ctx) { struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; if (ctx_obj) { WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); - if (--ctx->engine[ring->id].unpin_count == 0) + if (--ctx->engine[ring->id].unpin_count == 0) { + intel_unpin_ringbuffer_obj(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); + } } } @@ -1595,7 +1630,6 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *ring_obj = ringbuf->obj; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; struct page *page; uint32_t *reg_state; @@ -1641,7 +1675,9 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); reg_state[CTX_RING_TAIL+1] = 0; reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); - reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); + /* Ring buffer start address is not known until the buffer is pinned. + * It is written to the context image in execlists_update_context() + */ reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); reg_state[CTX_RING_BUFFER_CONTROL+1] = ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID; @@ -1723,10 +1759,12 @@ void intel_lr_context_free(struct intel_context *ctx) ctx->engine[i].ringbuf; struct intel_engine_cs *ring = ringbuf->ring; + if (ctx == ring->default_context) { + intel_unpin_ringbuffer_obj(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + } intel_destroy_ringbuffer_obj(ringbuf); kfree(ringbuf); - if (ctx == ring->default_context) - i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); } } @@ -1823,11 +1861,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, if (!ringbuf) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", ring->name); - if (is_global_default_ctx) - i915_gem_object_ggtt_unpin(ctx_obj); - drm_gem_object_unreference(&ctx_obj->base); ret = -ENOMEM; - return ret; + goto error_unpin_ctx; } ringbuf->ring = ring; @@ -1840,22 +1875,30 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, ringbuf->space = ringbuf->size; ringbuf->last_retired_head = -1; - /* TODO: For now we put this in the mappable region so that we can reuse - * the existing ringbuffer code which ioremaps it. When we start - * creating many contexts, this will no longer work and we must switch - * to a kmapish interface. - */ - ret = intel_alloc_ringbuffer_obj(dev, ringbuf); - if (ret) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", + if (ringbuf->obj == NULL) { + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER( + "Failed to allocate ringbuffer obj %s: %d\n", ring->name, ret); - goto error; + goto error_free_rbuf; + } + + if (is_global_default_ctx) { + ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_ERROR( + "Failed to pin and map ringbuffer %s: %d\n", + ring->name, ret); + goto error_destroy_rbuf; + } + } + } ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf); if (ret) { DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - intel_destroy_ringbuffer_obj(ringbuf); goto error; } @@ -1877,7 +1920,6 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, DRM_ERROR("Init render state failed: %d\n", ret); ctx->engine[ring->id].ringbuf = NULL; ctx->engine[ring->id].state = NULL; - intel_destroy_ringbuffer_obj(ringbuf); goto error; } ctx->rcs_initialized = true; @@ -1886,7 +1928,13 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return 0; error: + if (is_global_default_ctx) + intel_unpin_ringbuffer_obj(ringbuf); +error_destroy_rbuf: + intel_destroy_ringbuffer_obj(ringbuf); +error_free_rbuf: kfree(ringbuf); +error_unpin_ctx: if (is_global_default_ctx) i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae092589ea0c..0a4f35e735c3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1722,13 +1722,42 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { - if (!ringbuf->obj) - return; - iounmap(ringbuf->virtual_start); + ringbuf->virtual_start = NULL; i915_gem_object_ggtt_unpin(ringbuf->obj); +} + +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj = ringbuf->obj; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); + if (ret) + return ret; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) { + i915_gem_object_ggtt_unpin(obj); + return ret; + } + + ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base + + i915_gem_obj_ggtt_offset(obj), ringbuf->size); + if (ringbuf->virtual_start == NULL) { + i915_gem_object_ggtt_unpin(obj); + return -EINVAL; + } + + return 0; +} + +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +{ drm_gem_object_unreference(&ringbuf->obj->base); ringbuf->obj = NULL; } @@ -1736,12 +1765,7 @@ void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) int intel_alloc_ringbuffer_obj(struct drm_device *dev, struct intel_ringbuffer *ringbuf) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - int ret; - - if (ringbuf->obj) - return 0; obj = NULL; if (!HAS_LLC(dev)) @@ -1754,30 +1778,9 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev, /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); - if (ret) - goto err_unref; - - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto err_unpin; - - ringbuf->virtual_start = - ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj), - ringbuf->size); - if (ringbuf->virtual_start == NULL) { - ret = -EINVAL; - goto err_unpin; - } - ringbuf->obj = obj; - return 0; -err_unpin: - i915_gem_object_ggtt_unpin(obj); -err_unref: - drm_gem_object_unreference(&obj->base); - return ret; + return 0; } static int intel_init_ring_buffer(struct drm_device *dev, @@ -1814,10 +1817,21 @@ static int intel_init_ring_buffer(struct drm_device *dev, goto error; } - ret = intel_alloc_ringbuffer_obj(dev, ringbuf); - if (ret) { - DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret); - goto error; + if (ringbuf->obj == NULL) { + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", + ring->name, ret); + goto error; + } + + ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", + ring->name, ret); + intel_destroy_ringbuffer_obj(ringbuf); + goto error; + } } /* Workaround an erratum on the i830 which causes a hang if @@ -1858,6 +1872,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) intel_stop_ring_buffer(ring); WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); + intel_unpin_ringbuffer_obj(ringbuf); intel_destroy_ringbuffer_obj(ringbuf); ring->preallocated_lazy_request = NULL; ring->outstanding_lazy_seqno = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 85156567044b..9eb3188595a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -383,6 +383,9 @@ intel_write_status_page(struct intel_engine_cs *ring, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); +int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf); void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); int intel_alloc_ringbuffer_obj(struct drm_device *dev, struct intel_ringbuffer *ringbuf); -- cgit v1.2.3 From 5c6c600354adac5f95fd41b178b084ac0182e14c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 6 Sep 2014 10:28:27 +0100 Subject: drm/i915: Remove DRI1 ring accessors and API With the deprecation of UMS, and by association DRI1, we have a tough choice when updating the ring access routines. We either rewrite the DRI1 routines blindly without testing (so likely to be broken) or take the liberty of declaring them no longer supported and remove them entirely. This takes the latter approach. v2: Also remove the DRI1 sarea updates Signed-off-by: Chris Wilson [danvet: Fix rebase conflicts.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 931 +---------------------------- drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_drv.h | 27 - drivers/gpu/drm/i915/i915_gem.c | 4 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 43 +- drivers/gpu/drm/i915/i915_irq.c | 6 - drivers/gpu/drm/i915/intel_display.c | 35 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 104 +--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 - 9 files changed, 62 insertions(+), 1093 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 56f62112411a..d1562281e607 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -50,883 +50,58 @@ #include #include -#define LP_RING(d) (&((struct drm_i915_private *)(d))->ring[RCS]) - -#define BEGIN_LP_RING(n) \ - intel_ring_begin(LP_RING(dev_priv), (n)) - -#define OUT_RING(x) \ - intel_ring_emit(LP_RING(dev_priv), x) - -#define ADVANCE_LP_RING() \ - __intel_ring_advance(LP_RING(dev_priv)) - -/** - * Lock test for when it's just for synchronization of ring access. - * - * In that case, we don't need to do it when GEM is initialized as nobody else - * has access to the ring. - */ -#define RING_LOCK_TEST_WITH_RETURN(dev, file) do { \ - if (LP_RING(dev->dev_private)->buffer->obj == NULL) \ - LOCK_TEST_WITH_RETURN(dev, file); \ -} while (0) - -static inline u32 -intel_read_legacy_status_page(struct drm_i915_private *dev_priv, int reg) -{ - if (I915_NEED_GFX_HWS(dev_priv->dev)) - return ioread32(dev_priv->dri1.gfx_hws_cpu_addr + reg); - else - return intel_read_status_page(LP_RING(dev_priv), reg); -} - -#define READ_HWSP(dev_priv, reg) intel_read_legacy_status_page(dev_priv, reg) -#define READ_BREADCRUMB(dev_priv) READ_HWSP(dev_priv, I915_BREADCRUMB_INDEX) -#define I915_BREADCRUMB_INDEX 0x21 - -void i915_update_dri1_breadcrumb(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv; - - /* - * The dri breadcrumb update races against the drm master disappearing. - * Instead of trying to fix this (this is by far not the only ums issue) - * just don't do the update in kms mode. - */ - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return; - - if (dev->primary->master) { - master_priv = dev->primary->master->driver_priv; - if (master_priv->sarea_priv) - master_priv->sarea_priv->last_dispatch = - READ_BREADCRUMB(dev_priv); - } -} - -static void i915_write_hws_pga(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 addr; - - addr = dev_priv->status_page_dmah->busaddr; - if (INTEL_INFO(dev)->gen >= 4) - addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; - I915_WRITE(HWS_PGA, addr); -} - -/** - * Frees the hardware status page, whether it's a physical address or a virtual - * address set up by the X Server. - */ -static void i915_free_hws(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = LP_RING(dev_priv); - - if (dev_priv->status_page_dmah) { - drm_pci_free(dev, dev_priv->status_page_dmah); - dev_priv->status_page_dmah = NULL; - } - - if (ring->status_page.gfx_addr) { - ring->status_page.gfx_addr = 0; - iounmap(dev_priv->dri1.gfx_hws_cpu_addr); - } - - /* Need to rewrite hardware status page */ - I915_WRITE(HWS_PGA, 0x1ffff000); -} - -void i915_kernel_lost_context(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv; - struct intel_engine_cs *ring = LP_RING(dev_priv); - struct intel_ringbuffer *ringbuf = ring->buffer; - - /* - * We should never lose context on the ring with modesetting - * as we don't expose it to userspace - */ - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return; - - ringbuf->head = I915_READ_HEAD(ring) & HEAD_ADDR; - ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ringbuf->head - (ringbuf->tail + I915_RING_FREE_SPACE); - if (ringbuf->space < 0) - ringbuf->space += ringbuf->size; - - if (!dev->primary->master) - return; - - master_priv = dev->primary->master->driver_priv; - if (ringbuf->head == ringbuf->tail && master_priv->sarea_priv) - master_priv->sarea_priv->perf_boxes |= I915_BOX_RING_EMPTY; -} - -static int i915_dma_cleanup(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - /* Make sure interrupts are disabled here because the uninstall ioctl - * may not have been called from userspace and after dev_private - * is freed, it's too late. - */ - if (dev->irq_enabled) - drm_irq_uninstall(dev); - - mutex_lock(&dev->struct_mutex); - for (i = 0; i < I915_NUM_RINGS; i++) - intel_cleanup_ring_buffer(&dev_priv->ring[i]); - mutex_unlock(&dev->struct_mutex); - - /* Clear the HWS virtual address at teardown */ - if (I915_NEED_GFX_HWS(dev)) - i915_free_hws(dev); - - return 0; -} - -static int i915_initialize(struct drm_device *dev, drm_i915_init_t *init) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; - int ret; - - master_priv->sarea = drm_legacy_getsarea(dev); - if (master_priv->sarea) { - master_priv->sarea_priv = (drm_i915_sarea_t *) - ((u8 *)master_priv->sarea->handle + init->sarea_priv_offset); - } else { - DRM_DEBUG_DRIVER("sarea not found assuming DRI2 userspace\n"); - } - - if (init->ring_size != 0) { - if (LP_RING(dev_priv)->buffer->obj != NULL) { - i915_dma_cleanup(dev); - DRM_ERROR("Client tried to initialize ringbuffer in " - "GEM mode\n"); - return -EINVAL; - } - - ret = intel_render_ring_init_dri(dev, - init->ring_start, - init->ring_size); - if (ret) { - i915_dma_cleanup(dev); - return ret; - } - } - - dev_priv->dri1.cpp = init->cpp; - dev_priv->dri1.back_offset = init->back_offset; - dev_priv->dri1.front_offset = init->front_offset; - dev_priv->dri1.current_page = 0; - if (master_priv->sarea_priv) - master_priv->sarea_priv->pf_current_page = 0; - - /* Allow hardware batchbuffers unless told otherwise. - */ - dev_priv->dri1.allow_batchbuffer = 1; - - return 0; -} - -static int i915_dma_resume(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = LP_RING(dev_priv); - - DRM_DEBUG_DRIVER("%s\n", __func__); - - if (ring->buffer->virtual_start == NULL) { - DRM_ERROR("can not ioremap virtual address for" - " ring buffer\n"); - return -ENOMEM; - } - - /* Program Hardware Status Page */ - if (!ring->status_page.page_addr) { - DRM_ERROR("Can not find hardware status page\n"); - return -EINVAL; - } - DRM_DEBUG_DRIVER("hw status page @ %p\n", - ring->status_page.page_addr); - if (ring->status_page.gfx_addr != 0) - intel_ring_setup_status_page(ring); - else - i915_write_hws_pga(dev); - - DRM_DEBUG_DRIVER("Enabled hardware status page\n"); - - return 0; -} - static int i915_dma_init(struct drm_device *dev, void *data, struct drm_file *file_priv) { - drm_i915_init_t *init = data; - int retcode = 0; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - switch (init->func) { - case I915_INIT_DMA: - retcode = i915_initialize(dev, init); - break; - case I915_CLEANUP_DMA: - retcode = i915_dma_cleanup(dev); - break; - case I915_RESUME_DMA: - retcode = i915_dma_resume(dev); - break; - default: - retcode = -EINVAL; - break; - } - - return retcode; -} - -/* Implement basically the same security restrictions as hardware does - * for MI_BATCH_NON_SECURE. These can be made stricter at any time. - * - * Most of the calculations below involve calculating the size of a - * particular instruction. It's important to get the size right as - * that tells us where the next instruction to check is. Any illegal - * instruction detected will be given a size of zero, which is a - * signal to abort the rest of the buffer. - */ -static int validate_cmd(int cmd) -{ - switch (((cmd >> 29) & 0x7)) { - case 0x0: - switch ((cmd >> 23) & 0x3f) { - case 0x0: - return 1; /* MI_NOOP */ - case 0x4: - return 1; /* MI_FLUSH */ - default: - return 0; /* disallow everything else */ - } - break; - case 0x1: - return 0; /* reserved */ - case 0x2: - return (cmd & 0xff) + 2; /* 2d commands */ - case 0x3: - if (((cmd >> 24) & 0x1f) <= 0x18) - return 1; - - switch ((cmd >> 24) & 0x1f) { - case 0x1c: - return 1; - case 0x1d: - switch ((cmd >> 16) & 0xff) { - case 0x3: - return (cmd & 0x1f) + 2; - case 0x4: - return (cmd & 0xf) + 2; - default: - return (cmd & 0xffff) + 2; - } - case 0x1e: - if (cmd & (1 << 23)) - return (cmd & 0xffff) + 1; - else - return 1; - case 0x1f: - if ((cmd & (1 << 23)) == 0) /* inline vertices */ - return (cmd & 0x1ffff) + 2; - else if (cmd & (1 << 17)) /* indirect random */ - if ((cmd & 0xffff) == 0) - return 0; /* unknown length, too hard */ - else - return (((cmd & 0xffff) + 1) / 2) + 1; - else - return 2; /* indirect sequential */ - default: - return 0; - } - default: - return 0; - } - - return 0; -} - -static int i915_emit_cmds(struct drm_device *dev, int *buffer, int dwords) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int i, ret; - - if ((dwords+1) * sizeof(int) >= LP_RING(dev_priv)->buffer->size - 8) - return -EINVAL; - - for (i = 0; i < dwords;) { - int sz = validate_cmd(buffer[i]); - - if (sz == 0 || i + sz > dwords) - return -EINVAL; - i += sz; - } - - ret = BEGIN_LP_RING((dwords+1)&~1); - if (ret) - return ret; - - for (i = 0; i < dwords; i++) - OUT_RING(buffer[i]); - if (dwords & 1) - OUT_RING(0); - - ADVANCE_LP_RING(); - - return 0; -} - -int -i915_emit_box(struct drm_device *dev, - struct drm_clip_rect *box, - int DR1, int DR4) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - if (box->y2 <= box->y1 || box->x2 <= box->x1 || - box->y2 <= 0 || box->x2 <= 0) { - DRM_ERROR("Bad box %d,%d..%d,%d\n", - box->x1, box->y1, box->x2, box->y2); - return -EINVAL; - } - - if (INTEL_INFO(dev)->gen >= 4) { - ret = BEGIN_LP_RING(4); - if (ret) - return ret; - - OUT_RING(GFX_OP_DRAWRECT_INFO_I965); - OUT_RING((box->x1 & 0xffff) | (box->y1 << 16)); - OUT_RING(((box->x2 - 1) & 0xffff) | ((box->y2 - 1) << 16)); - OUT_RING(DR4); - } else { - ret = BEGIN_LP_RING(6); - if (ret) - return ret; - - OUT_RING(GFX_OP_DRAWRECT_INFO); - OUT_RING(DR1); - OUT_RING((box->x1 & 0xffff) | (box->y1 << 16)); - OUT_RING(((box->x2 - 1) & 0xffff) | ((box->y2 - 1) << 16)); - OUT_RING(DR4); - OUT_RING(0); - } - ADVANCE_LP_RING(); - - return 0; -} - -/* XXX: Emitting the counter should really be moved to part of the IRQ - * emit. For now, do it in both places: - */ - -static void i915_emit_breadcrumb(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; - - dev_priv->dri1.counter++; - if (dev_priv->dri1.counter > 0x7FFFFFFFUL) - dev_priv->dri1.counter = 0; - if (master_priv->sarea_priv) - master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter; - - if (BEGIN_LP_RING(4) == 0) { - OUT_RING(MI_STORE_DWORD_INDEX); - OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - OUT_RING(dev_priv->dri1.counter); - OUT_RING(0); - ADVANCE_LP_RING(); - } -} - -static int i915_dispatch_cmdbuffer(struct drm_device *dev, - drm_i915_cmdbuffer_t *cmd, - struct drm_clip_rect *cliprects, - void *cmdbuf) -{ - int nbox = cmd->num_cliprects; - int i = 0, count, ret; - - if (cmd->sz & 0x3) { - DRM_ERROR("alignment"); - return -EINVAL; - } - - i915_kernel_lost_context(dev); - - count = nbox ? nbox : 1; - - for (i = 0; i < count; i++) { - if (i < nbox) { - ret = i915_emit_box(dev, &cliprects[i], - cmd->DR1, cmd->DR4); - if (ret) - return ret; - } - - ret = i915_emit_cmds(dev, cmdbuf, cmd->sz / 4); - if (ret) - return ret; - } - - i915_emit_breadcrumb(dev); - return 0; -} - -static int i915_dispatch_batchbuffer(struct drm_device *dev, - drm_i915_batchbuffer_t *batch, - struct drm_clip_rect *cliprects) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int nbox = batch->num_cliprects; - int i, count, ret; - - if ((batch->start | batch->used) & 0x7) { - DRM_ERROR("alignment"); - return -EINVAL; - } - - i915_kernel_lost_context(dev); - - count = nbox ? nbox : 1; - for (i = 0; i < count; i++) { - if (i < nbox) { - ret = i915_emit_box(dev, &cliprects[i], - batch->DR1, batch->DR4); - if (ret) - return ret; - } - - if (!IS_I830(dev) && !IS_845G(dev)) { - ret = BEGIN_LP_RING(2); - if (ret) - return ret; - - if (INTEL_INFO(dev)->gen >= 4) { - OUT_RING(MI_BATCH_BUFFER_START | (2 << 6) | MI_BATCH_NON_SECURE_I965); - OUT_RING(batch->start); - } else { - OUT_RING(MI_BATCH_BUFFER_START | (2 << 6)); - OUT_RING(batch->start | MI_BATCH_NON_SECURE); - } - } else { - ret = BEGIN_LP_RING(4); - if (ret) - return ret; - - OUT_RING(MI_BATCH_BUFFER); - OUT_RING(batch->start | MI_BATCH_NON_SECURE); - OUT_RING(batch->start + batch->used - 4); - OUT_RING(0); - } - ADVANCE_LP_RING(); - } - - - if (IS_G4X(dev) || IS_GEN5(dev)) { - if (BEGIN_LP_RING(2) == 0) { - OUT_RING(MI_FLUSH | MI_NO_WRITE_FLUSH | MI_INVALIDATE_ISP); - OUT_RING(MI_NOOP); - ADVANCE_LP_RING(); - } - } - - i915_emit_breadcrumb(dev); - return 0; -} - -static int i915_dispatch_flip(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv = - dev->primary->master->driver_priv; - int ret; - - if (!master_priv->sarea_priv) - return -EINVAL; - - DRM_DEBUG_DRIVER("%s: page=%d pfCurrentPage=%d\n", - __func__, - dev_priv->dri1.current_page, - master_priv->sarea_priv->pf_current_page); - - i915_kernel_lost_context(dev); - - ret = BEGIN_LP_RING(10); - if (ret) - return ret; - - OUT_RING(MI_FLUSH | MI_READ_FLUSH); - OUT_RING(0); - - OUT_RING(CMD_OP_DISPLAYBUFFER_INFO | ASYNC_FLIP); - OUT_RING(0); - if (dev_priv->dri1.current_page == 0) { - OUT_RING(dev_priv->dri1.back_offset); - dev_priv->dri1.current_page = 1; - } else { - OUT_RING(dev_priv->dri1.front_offset); - dev_priv->dri1.current_page = 0; - } - OUT_RING(0); - - OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_A_FLIP); - OUT_RING(0); - - ADVANCE_LP_RING(); - - master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter++; - - if (BEGIN_LP_RING(4) == 0) { - OUT_RING(MI_STORE_DWORD_INDEX); - OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - OUT_RING(dev_priv->dri1.counter); - OUT_RING(0); - ADVANCE_LP_RING(); - } - - master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page; - return 0; -} - -static int i915_quiescent(struct drm_device *dev) -{ - i915_kernel_lost_context(dev); - return intel_ring_idle(LP_RING(dev->dev_private)); + return -ENODEV; } static int i915_flush_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - int ret; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - - mutex_lock(&dev->struct_mutex); - ret = i915_quiescent(dev); - mutex_unlock(&dev->struct_mutex); - - return ret; + return -ENODEV; } static int i915_batchbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv; - drm_i915_sarea_t *sarea_priv; - drm_i915_batchbuffer_t *batch = data; - int ret; - struct drm_clip_rect *cliprects = NULL; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - master_priv = dev->primary->master->driver_priv; - sarea_priv = (drm_i915_sarea_t *) master_priv->sarea_priv; - - if (!dev_priv->dri1.allow_batchbuffer) { - DRM_ERROR("Batchbuffer ioctl disabled\n"); - return -EINVAL; - } - - DRM_DEBUG_DRIVER("i915 batchbuffer, start %x used %d cliprects %d\n", - batch->start, batch->used, batch->num_cliprects); - - RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - - if (batch->num_cliprects < 0) - return -EINVAL; - - if (batch->num_cliprects) { - cliprects = kcalloc(batch->num_cliprects, - sizeof(*cliprects), - GFP_KERNEL); - if (cliprects == NULL) - return -ENOMEM; - - ret = copy_from_user(cliprects, batch->cliprects, - batch->num_cliprects * - sizeof(struct drm_clip_rect)); - if (ret != 0) { - ret = -EFAULT; - goto fail_free; - } - } - - mutex_lock(&dev->struct_mutex); - ret = i915_dispatch_batchbuffer(dev, batch, cliprects); - mutex_unlock(&dev->struct_mutex); - - if (sarea_priv) - sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - -fail_free: - kfree(cliprects); - - return ret; + return -ENODEV; } static int i915_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv; - drm_i915_sarea_t *sarea_priv; - drm_i915_cmdbuffer_t *cmdbuf = data; - struct drm_clip_rect *cliprects = NULL; - void *batch_data; - int ret; - - DRM_DEBUG_DRIVER("i915 cmdbuffer, buf %p sz %d cliprects %d\n", - cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects); - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - master_priv = dev->primary->master->driver_priv; - sarea_priv = (drm_i915_sarea_t *) master_priv->sarea_priv; - - RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - - if (cmdbuf->num_cliprects < 0) - return -EINVAL; - - batch_data = kmalloc(cmdbuf->sz, GFP_KERNEL); - if (batch_data == NULL) - return -ENOMEM; - - ret = copy_from_user(batch_data, cmdbuf->buf, cmdbuf->sz); - if (ret != 0) { - ret = -EFAULT; - goto fail_batch_free; - } - - if (cmdbuf->num_cliprects) { - cliprects = kcalloc(cmdbuf->num_cliprects, - sizeof(*cliprects), GFP_KERNEL); - if (cliprects == NULL) { - ret = -ENOMEM; - goto fail_batch_free; - } - - ret = copy_from_user(cliprects, cmdbuf->cliprects, - cmdbuf->num_cliprects * - sizeof(struct drm_clip_rect)); - if (ret != 0) { - ret = -EFAULT; - goto fail_clip_free; - } - } - - mutex_lock(&dev->struct_mutex); - ret = i915_dispatch_cmdbuffer(dev, cmdbuf, cliprects, batch_data); - mutex_unlock(&dev->struct_mutex); - if (ret) { - DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); - goto fail_clip_free; - } - - if (sarea_priv) - sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - -fail_clip_free: - kfree(cliprects); -fail_batch_free: - kfree(batch_data); - - return ret; -} - -static int i915_emit_irq(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; - - i915_kernel_lost_context(dev); - - DRM_DEBUG_DRIVER("\n"); - - dev_priv->dri1.counter++; - if (dev_priv->dri1.counter > 0x7FFFFFFFUL) - dev_priv->dri1.counter = 1; - if (master_priv->sarea_priv) - master_priv->sarea_priv->last_enqueue = dev_priv->dri1.counter; - - if (BEGIN_LP_RING(4) == 0) { - OUT_RING(MI_STORE_DWORD_INDEX); - OUT_RING(I915_BREADCRUMB_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - OUT_RING(dev_priv->dri1.counter); - OUT_RING(MI_USER_INTERRUPT); - ADVANCE_LP_RING(); - } - - return dev_priv->dri1.counter; -} - -static int i915_wait_irq(struct drm_device *dev, int irq_nr) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; - int ret = 0; - struct intel_engine_cs *ring = LP_RING(dev_priv); - - DRM_DEBUG_DRIVER("irq_nr=%d breadcrumb=%d\n", irq_nr, - READ_BREADCRUMB(dev_priv)); - - if (READ_BREADCRUMB(dev_priv) >= irq_nr) { - if (master_priv->sarea_priv) - master_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); - return 0; - } - - if (master_priv->sarea_priv) - master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; - - if (ring->irq_get(ring)) { - DRM_WAIT_ON(ret, ring->irq_queue, 3 * HZ, - READ_BREADCRUMB(dev_priv) >= irq_nr); - ring->irq_put(ring); - } else if (wait_for(READ_BREADCRUMB(dev_priv) >= irq_nr, 3000)) - ret = -EBUSY; - - if (ret == -EBUSY) { - DRM_ERROR("EBUSY -- rec: %d emitted: %d\n", - READ_BREADCRUMB(dev_priv), (int)dev_priv->dri1.counter); - } - - return ret; + return -ENODEV; } -/* Needs the lock as it touches the ring. - */ static int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - drm_i915_irq_emit_t *emit = data; - int result; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - if (!dev_priv || !LP_RING(dev_priv)->buffer->virtual_start) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } - - RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - - mutex_lock(&dev->struct_mutex); - result = i915_emit_irq(dev); - mutex_unlock(&dev->struct_mutex); - - if (copy_to_user(emit->irq_seq, &result, sizeof(int))) { - DRM_ERROR("copy_to_user\n"); - return -EFAULT; - } - - return 0; + return -ENODEV; } -/* Doesn't need the hardware lock. - */ static int i915_irq_wait(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - drm_i915_irq_wait_t *irqwait = data; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } - - return i915_wait_irq(dev, irqwait->irq_seq); + return -ENODEV; } static int i915_vblank_pipe_get(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - drm_i915_vblank_pipe_t *pipe = data; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } - - pipe->pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B; - - return 0; + return -ENODEV; } -/** - * Schedule buffer swap at given vertical blank. - */ static int i915_vblank_swap(struct drm_device *dev, void *data, struct drm_file *file_priv) { - /* The delayed swap mechanism was fundamentally racy, and has been - * removed. The model was that the client requested a delayed flip/swap - * from the kernel, then waited for vblank before continuing to perform - * rendering. The problem was that the kernel might wake the client - * up before it dispatched the vblank swap (since the lock has to be - * held while touching the ringbuffer), in which case the client would - * clear and start the next frame before the swap occurred, and - * flicker would occur in addition to likely missing the vblank. - * - * In the absence of this ioctl, userland falls back to a correct path - * of waiting for a vblank, then dispatching the swap on its own. - * Context switching to userland and back is plenty fast enough for - * meeting the requirements of vblank swapping. - */ - return -EINVAL; + return -ENODEV; } static int i915_flip_bufs(struct drm_device *dev, void *data, struct drm_file *file_priv) { - int ret; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - DRM_DEBUG_DRIVER("%s\n", __func__); - - RING_LOCK_TEST_WITH_RETURN(dev, file_priv); - - mutex_lock(&dev->struct_mutex); - ret = i915_dispatch_flip(dev); - mutex_unlock(&dev->struct_mutex); - - return ret; + return -ENODEV; } static int i915_getparam(struct drm_device *dev, void *data, @@ -943,14 +118,11 @@ static int i915_getparam(struct drm_device *dev, void *data, switch (param->param) { case I915_PARAM_IRQ_ACTIVE: - value = dev->pdev->irq ? 1 : 0; - break; + return -ENODEV; case I915_PARAM_ALLOW_BATCHBUFFER: - value = dev_priv->dri1.allow_batchbuffer ? 1 : 0; - break; + return -ENODEV; case I915_PARAM_LAST_DISPATCH: - value = READ_BREADCRUMB(dev_priv); - break; + return -ENODEV; case I915_PARAM_CHIPSET_ID: value = dev->pdev->device; break; @@ -1056,12 +228,10 @@ static int i915_setparam(struct drm_device *dev, void *data, switch (param->param) { case I915_SETPARAM_USE_MI_BATCHBUFFER_START: - break; case I915_SETPARAM_TEX_LRU_LOG_GRANULARITY: - break; case I915_SETPARAM_ALLOW_BATCHBUFFER: - dev_priv->dri1.allow_batchbuffer = param->value ? 1 : 0; - break; + return -ENODEV; + case I915_SETPARAM_NUM_USED_FENCES: if (param->value > dev_priv->num_fence_regs || param->value < 0) @@ -1081,49 +251,7 @@ static int i915_setparam(struct drm_device *dev, void *data, static int i915_set_status_page(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - drm_i915_hws_addr_t *hws = data; - struct intel_engine_cs *ring; - - if (drm_core_check_feature(dev, DRIVER_MODESET)) - return -ENODEV; - - if (!I915_NEED_GFX_HWS(dev)) - return -EINVAL; - - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } - - if (drm_core_check_feature(dev, DRIVER_MODESET)) { - WARN(1, "tried to set status page when mode setting active\n"); - return 0; - } - - DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr); - - ring = LP_RING(dev_priv); - ring->status_page.gfx_addr = hws->addr & (0x1ffff<<12); - - dev_priv->dri1.gfx_hws_cpu_addr = - ioremap_wc(dev_priv->gtt.mappable_base + hws->addr, 4096); - if (dev_priv->dri1.gfx_hws_cpu_addr == NULL) { - i915_dma_cleanup(dev); - ring->status_page.gfx_addr = 0; - DRM_ERROR("can not ioremap virtual address for" - " G33 hw status page\n"); - return -ENOMEM; - } - - memset_io(dev_priv->dri1.gfx_hws_cpu_addr, 0, PAGE_SIZE); - I915_WRITE(HWS_PGA, ring->status_page.gfx_addr); - - DRM_DEBUG_DRIVER("load hws HWS_PGA with gfx mem 0x%x\n", - ring->status_page.gfx_addr); - DRM_DEBUG_DRIVER("load hws at %p\n", - ring->status_page.page_addr); - return 0; + return -ENODEV; } static int i915_get_bridge_dev(struct drm_device *dev) @@ -1401,30 +529,6 @@ out: return ret; } -int i915_master_create(struct drm_device *dev, struct drm_master *master) -{ - struct drm_i915_master_private *master_priv; - - master_priv = kzalloc(sizeof(*master_priv), GFP_KERNEL); - if (!master_priv) - return -ENOMEM; - - master->driver_priv = master_priv; - return 0; -} - -void i915_master_destroy(struct drm_device *dev, struct drm_master *master) -{ - struct drm_i915_master_private *master_priv = master->driver_priv; - - if (!master_priv) - return; - - kfree(master_priv); - - master->driver_priv = NULL; -} - #if IS_ENABLED(CONFIG_FB) static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) { @@ -1899,9 +1003,6 @@ int i915_driver_unload(struct drm_device *dev) i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); i915_gem_cleanup_stolen(dev); - - if (!I915_NEED_GFX_HWS(dev)) - i915_free_hws(dev); } intel_teardown_gmbus(dev); @@ -1966,8 +1067,6 @@ void i915_driver_lastclose(struct drm_device *dev) } i915_gem_lastclose(dev); - - i915_dma_cleanup(dev); } void i915_driver_preclose(struct drm_device *dev, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 19558d61b032..c9f248acbd69 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1573,8 +1573,6 @@ static struct drm_driver driver = { .resume = i915_resume_legacy, .device_is_agp = i915_driver_device_is_agp, - .master_create = i915_master_create, - .master_destroy = i915_master_destroy, #if defined(CONFIG_DEBUG_FS) .debugfs_init = i915_debugfs_init, .debugfs_cleanup = i915_debugfs_cleanup, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1097d20a4f35..100296475fb4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -327,12 +327,6 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -struct drm_local_map; - -struct drm_i915_master_private { - struct drm_local_map *sarea; - struct _drm_i915_sarea *sarea_priv; -}; #define I915_FENCE_REG_NONE -1 #define I915_MAX_NUM_FENCES 32 /* 32 fences + sign bit for FENCE_REG_NONE */ @@ -1127,19 +1121,6 @@ struct i915_power_domains { struct i915_power_well *power_wells; }; -struct i915_dri1_state { - unsigned allow_batchbuffer : 1; - u32 __iomem *gfx_hws_cpu_addr; - - unsigned int cpp; - int back_offset; - int front_offset; - int current_page; - int page_flipping; - - uint32_t counter; -}; - struct i915_ums_state { /** * Flag if the X Server, and thus DRM, is not currently in @@ -1787,9 +1768,6 @@ struct drm_i915_private { uint32_t bios_vgacntr; - /* Old dri1 support infrastructure, beware the dragons ya fools entering - * here! */ - struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; @@ -2351,8 +2329,6 @@ struct i915_params { extern struct i915_params i915 __read_mostly; /* i915_dma.c */ -void i915_update_dri1_breadcrumb(struct drm_device *dev); -extern void i915_kernel_lost_context(struct drm_device * dev); extern int i915_driver_load(struct drm_device *, unsigned long flags); extern int i915_driver_unload(struct drm_device *); extern int i915_driver_open(struct drm_device *dev, struct drm_file *file); @@ -2366,9 +2342,6 @@ extern int i915_driver_device_is_agp(struct drm_device * dev); extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); #endif -extern int i915_emit_box(struct drm_device *dev, - struct drm_clip_rect *box, - int DR1, int DR4); extern int intel_gpu_reset(struct drm_device *dev); extern int i915_reset(struct drm_device *dev); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7f95d8ff28af..7985f7b28325 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4664,7 +4664,6 @@ i915_gem_suspend(struct drm_device *dev) if (!drm_core_check_feature(dev, DRIVER_MODESET)) i915_gem_evict_everything(dev); - i915_kernel_lost_context(dev); i915_gem_stop_ringbuffers(dev); /* Hack! Don't let anybody do execbuf while we don't control the chip. @@ -4963,9 +4962,6 @@ int i915_gem_init(struct drm_device *dev) } mutex_unlock(&dev->struct_mutex); - /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ - if (!drm_core_check_feature(dev, DRIVER_MODESET)) - dev_priv->dri1.allow_batchbuffer = 1; return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e1ed85a6dc6d..b16eee061990 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1020,6 +1020,47 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } +static int +i915_emit_box(struct intel_engine_cs *ring, + struct drm_clip_rect *box, + int DR1, int DR4) +{ + int ret; + + if (box->y2 <= box->y1 || box->x2 <= box->x1 || + box->y2 <= 0 || box->x2 <= 0) { + DRM_ERROR("Bad box %d,%d..%d,%d\n", + box->x1, box->y1, box->x2, box->y2); + return -EINVAL; + } + + if (INTEL_INFO(ring->dev)->gen >= 4) { + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965); + intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); + intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); + intel_ring_emit(ring, DR4); + } else { + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO); + intel_ring_emit(ring, DR1); + intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); + intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); + intel_ring_emit(ring, DR4); + intel_ring_emit(ring, 0); + } + intel_ring_advance(ring); + + return 0; +} + + int i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, @@ -1148,7 +1189,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { - ret = i915_emit_box(dev, &cliprects[i], + ret = i915_emit_box(ring, &cliprects[i], args->DR1, args->DR4); if (ret) goto error; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 283756fe48d3..8d169e152d1e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3764,8 +3764,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir & ~flip_mask); new_iir = I915_READ16(IIR); /* Flush posted writes */ - i915_update_dri1_breadcrumb(dev); - if (iir & I915_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[RCS]); @@ -4002,8 +4000,6 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) iir = new_iir; } while (iir & ~flip_mask); - i915_update_dri1_breadcrumb(dev); - return ret; } @@ -4231,8 +4227,6 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) iir = new_iir; } - i915_update_dri1_breadcrumb(dev); - return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6060b71d9301..9c983e4c33bd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5185,36 +5185,6 @@ static void i9xx_crtc_off(struct drm_crtc *crtc) { } -static void intel_crtc_update_sarea(struct drm_crtc *crtc, - bool enabled) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_master_private *master_priv; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - if (!dev->primary->master) - return; - - master_priv = dev->primary->master->driver_priv; - if (!master_priv->sarea_priv) - return; - - switch (pipe) { - case 0: - master_priv->sarea_priv->pipeA_w = enabled ? crtc->mode.hdisplay : 0; - master_priv->sarea_priv->pipeA_h = enabled ? crtc->mode.vdisplay : 0; - break; - case 1: - master_priv->sarea_priv->pipeB_w = enabled ? crtc->mode.hdisplay : 0; - master_priv->sarea_priv->pipeB_h = enabled ? crtc->mode.vdisplay : 0; - break; - default: - DRM_ERROR("Can't update pipe %c in SAREA\n", pipe_name(pipe)); - break; - } -} - /* Master function to enable/disable CRTC and corresponding power wells */ void intel_crtc_control(struct drm_crtc *crtc, bool enable) { @@ -5258,8 +5228,6 @@ void intel_crtc_update_dpms(struct drm_crtc *crtc) enable |= intel_encoder->connectors_active; intel_crtc_control(crtc, enable); - - intel_crtc_update_sarea(crtc, enable); } static void intel_crtc_disable(struct drm_crtc *crtc) @@ -5274,7 +5242,6 @@ static void intel_crtc_disable(struct drm_crtc *crtc) WARN_ON(!crtc->enabled); dev_priv->display.crtc_disable(crtc); - intel_crtc_update_sarea(crtc, false); dev_priv->display.off(crtc); if (crtc->primary->fb) { @@ -8369,7 +8336,7 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, uint32_t width, uint32_t height) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; unsigned old_width; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0a4f35e735c3..1d01b51ff058 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -589,14 +589,10 @@ static int init_ring_common(struct intel_engine_cs *ring) goto out; } - if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) - i915_kernel_lost_context(ring->dev); - else { - ringbuf->head = I915_READ_HEAD(ring); - ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = intel_ring_space(ringbuf); - ringbuf->last_retired_head = -1; - } + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; + ringbuf->space = intel_ring_space(ringbuf); + ringbuf->last_retired_head = -1; memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); @@ -1958,13 +1954,6 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) break; } - if (!drm_core_check_feature(dev, DRIVER_MODESET) && - dev->primary->master) { - struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; - if (master_priv->sarea_priv) - master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; - } - msleep(1); if (dev_priv->mm.interruptible && signal_pending(current)) { @@ -2455,91 +2444,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return intel_init_ring_buffer(dev, ring); } -int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; - struct intel_ringbuffer *ringbuf = ring->buffer; - int ret; - - if (ringbuf == NULL) { - ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); - if (!ringbuf) - return -ENOMEM; - ring->buffer = ringbuf; - } - - ring->name = "render ring"; - ring->id = RCS; - ring->mmio_base = RENDER_RING_BASE; - - if (INTEL_INFO(dev)->gen >= 6) { - /* non-kms not supported on gen6+ */ - ret = -ENODEV; - goto err_ringbuf; - } - - /* Note: gem is not supported on gen5/ilk without kms (the corresponding - * gem_init ioctl returns with -ENODEV). Hence we do not need to set up - * the special gen5 functions. */ - ring->add_request = i9xx_add_request; - if (INTEL_INFO(dev)->gen < 4) - ring->flush = gen2_render_ring_flush; - else - ring->flush = gen4_render_ring_flush; - ring->get_seqno = ring_get_seqno; - ring->set_seqno = ring_set_seqno; - if (IS_GEN2(dev)) { - ring->irq_get = i8xx_ring_get_irq; - ring->irq_put = i8xx_ring_put_irq; - } else { - ring->irq_get = i9xx_ring_get_irq; - ring->irq_put = i9xx_ring_put_irq; - } - ring->irq_enable_mask = I915_USER_INTERRUPT; - ring->write_tail = ring_write_tail; - if (INTEL_INFO(dev)->gen >= 4) - ring->dispatch_execbuffer = i965_dispatch_execbuffer; - else if (IS_I830(dev) || IS_845G(dev)) - ring->dispatch_execbuffer = i830_dispatch_execbuffer; - else - ring->dispatch_execbuffer = i915_dispatch_execbuffer; - ring->init = init_render_ring; - ring->cleanup = render_ring_cleanup; - - ring->dev = dev; - INIT_LIST_HEAD(&ring->active_list); - INIT_LIST_HEAD(&ring->request_list); - - ringbuf->size = size; - ringbuf->effective_size = ringbuf->size; - if (IS_I830(ring->dev) || IS_845G(ring->dev)) - ringbuf->effective_size -= 2 * CACHELINE_BYTES; - - ringbuf->virtual_start = ioremap_wc(start, size); - if (ringbuf->virtual_start == NULL) { - DRM_ERROR("can not ioremap virtual address for" - " ring buffer\n"); - ret = -ENOMEM; - goto err_ringbuf; - } - - if (!I915_NEED_GFX_HWS(dev)) { - ret = init_phys_status_page(ring); - if (ret) - goto err_vstart; - } - - return 0; - -err_vstart: - iounmap(ringbuf->virtual_start); -err_ringbuf: - kfree(ringbuf); - ring->buffer = NULL; - return ret; -} - int intel_init_bsd_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9eb3188595a6..fe426cff598b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -448,7 +448,4 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno) ring->trace_irq_seqno = seqno; } -/* DRI warts */ -int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size); - #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.2.3 From 98533251b0bbfa5f24c502b9ab2f01ccb25c26b8 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 8 Dec 2014 17:33:51 +0000 Subject: drm/i915/bdw: Fix the write setting up the WIZ hashing mode I was playing with clang and oh surprise! a warning trigerred by -Wshift-overflow (gcc doesn't have this one): WA_SET_BIT_MASKED(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); drivers/gpu/drm/i915/intel_ringbuffer.c:786:2: warning: signed shift result (0x28002000000) requires 43 bits to represent, but 'int' only has 32 bits [-Wshift-overflow] WA_SET_BIT_MASKED(GEN7_GT_MODE, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/intel_ringbuffer.c:737:15: note: expanded from macro 'WA_SET_BIT_MASKED' WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) Turned out GEN6_WIZ_HASHING_MASK was already shifted by 16, and we were trying to shift it a bit more. The other thing is that it's not the usual case of setting WA bits here, we need to have separate mask and value. To fix this, I've introduced a new _MASKED_FIELD() macro that takes both the (unshifted) mask and the desired value and the rest of the patch ripples through from it. This bug was introduced when reworking the WA emission in: Commit 7225342ab501befdb64bcec76ded41f5897c0855 Author: Mika Kuoppala Date: Tue Oct 7 17:21:26 2014 +0300 drm/i915: Build workaround list in ring initialization v2: Invert the order of the mask and value arguments (Daniel Vetter) Rewrite _MASKED_BIT_ENABLE() and _MASKED_BIT_DISABLE() with _MASKED_FIELD() (Jani Nikula) Make sure we only evaluate 'a' once in _MASKED_BIT_ENABLE() (Dave Gordon) Add check to ensure the value is within the mask boundaries (Chris Wilson) v3: Ensure the the value and mask are 16 bits (Dave Gordon) Cc: Mika Kuoppala Cc: Arun Siluvery Signed-off-by: Damien Lespiau Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 17 ++++++++++++++--- drivers/gpu/drm/i915/intel_pm.c | 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++-- 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 544675895c8d..b607bbe55261 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -32,8 +32,19 @@ #define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ (pipe) == PIPE_B ? (b) : (c)) -#define _MASKED_BIT_ENABLE(a) (((a) << 16) | (a)) -#define _MASKED_BIT_DISABLE(a) ((a) << 16) +#define _MASKED_FIELD(mask, value) ({ \ + if (__builtin_constant_p(mask)) \ + BUILD_BUG_ON_MSG(((mask) & 0xffff0000), "Incorrect mask"); \ + if (__builtin_constant_p(value)) \ + BUILD_BUG_ON_MSG((value) & 0xffff0000, "Incorrect value"); \ + if (__builtin_constant_p(mask) && __builtin_constant_p(value)) \ + BUILD_BUG_ON_MSG((value) & ~(mask), \ + "Incorrect value for mask"); \ + (mask) << 16 | (value); }) +#define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) +#define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) + + /* PCI config space */ @@ -1282,7 +1293,7 @@ enum punit_power_well { #define GEN6_WIZ_HASHING_8x8 GEN6_WIZ_HASHING(0, 0) #define GEN6_WIZ_HASHING_8x4 GEN6_WIZ_HASHING(0, 1) #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) -#define GEN6_WIZ_HASHING_MASK (GEN6_WIZ_HASHING(1, 1) << 16) +#define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) #define GFX_MODE 0x02520 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9af0af49382e..1f4b56e273c8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6508,7 +6508,7 @@ static void gen6_init_clock_gating(struct drm_device *dev) * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN6_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); ilk_init_lp_watermarks(dev); @@ -6706,7 +6706,7 @@ static void haswell_init_clock_gating(struct drm_device *dev) * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -6803,7 +6803,7 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d01b51ff058..28db934b2359 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -729,6 +729,9 @@ static int wa_add(struct drm_i915_private *dev_priv, #define WA_CLR_BIT_MASKED(addr, mask) \ WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, _MASKED_FIELD(mask, value), mask) + #define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) #define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) @@ -773,8 +776,9 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - WA_SET_BIT_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); return 0; } -- cgit v1.2.3 From cf4b0de6a3f6e1814c45206a8b175d09b265bb16 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 8 Dec 2014 17:35:37 +0000 Subject: drm/i915: Invert the mask and val arguments in wa_add() and WA_REG() While trying to unify the order of those arguments throughout the driver, Daniel noticed what we were inverting them in this part of the code. Suggested-by: Daniel Vetter Cc: Daniel Vetter Signed-off-by: Damien Lespiau Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ringbuffer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 28db934b2359..ef05af02763a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -701,7 +701,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, } static int wa_add(struct drm_i915_private *dev_priv, - const u32 addr, const u32 val, const u32 mask) + const u32 addr, const u32 mask, const u32 val) { const u32 idx = dev_priv->workarounds.count; @@ -717,25 +717,25 @@ static int wa_add(struct drm_i915_private *dev_priv, return 0; } -#define WA_REG(addr, val, mask) { \ - const int r = wa_add(dev_priv, (addr), (val), (mask)); \ +#define WA_REG(addr, mask, val) { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ if (r) \ return r; \ } #define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) + WA_REG(addr, (mask) & 0xffff, _MASKED_BIT_ENABLE(mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) + WA_REG(addr, (mask) & 0xffff, _MASKED_BIT_DISABLE(mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, _MASKED_FIELD(mask, value), mask) + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) -#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff) +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) static int bdw_init_workarounds(struct intel_engine_cs *ring) { -- cgit v1.2.3 From 26459343e0fac1ea4cb5192c490e3e519eed74dd Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 8 Dec 2014 17:35:38 +0000 Subject: drm/i915: Remove '& 0xffff' from the mask given to WA_REG() We may be hidding bugs by doing that, so let remove it and have the actual mask value shine through, for better or worse. Signed-off-by: Damien Lespiau Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ef05af02763a..9f445e9a75d1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -724,10 +724,10 @@ static int wa_add(struct drm_i915_private *dev_priv, } #define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask) & 0xffff, _MASKED_BIT_ENABLE(mask)) + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask) & 0xffff, _MASKED_BIT_DISABLE(mask)) + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -- cgit v1.2.3 From 148b83d0815a3778c8949e6a97cb798cbaa0efb3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Dec 2014 08:44:31 +0000 Subject: drm/i915: Invalidate media caches on gen7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the gen7 pipe control there is an extra bit to flush the media caches, so let's set it during cache invalidation flushes. v2: Rename to MEDIA_STATE_CLEAR to be more inline with spec. Cc: Simon Farnsworth Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eefdc238f70b..58009558cc94 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,6 +395,7 @@ #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9f445e9a75d1..3d6bc8d56e67 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -362,6 +362,7 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; /* * TLB invalidate requires a post-sync write. */ -- cgit v1.2.3 From add284a3a2481e759d6bec35f6444c32c8ddc383 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Dec 2014 08:44:32 +0000 Subject: drm/i915: Force the CS stall for invalidate flushes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to act as a full command barrier by itself, we need to tell the pipecontrol to actually stall the command streamer while the flush runs. We require the full command barrier before operations like MI_SET_CONTEXT, which currently rely on a prior invalidate flush. References: https://bugs.freedesktop.org/show_bug.cgi?id=83677 Cc: Simon Farnsworth Cc: Daniel Vetter Cc: Ville Syrjälä Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3d6bc8d56e67..c7bc93d28d84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -369,6 +369,8 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ -- cgit v1.2.3