summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c351
1 files changed, 220 insertions, 131 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e5b3c6dbd467..441e2502b889 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
return 0;
}
-static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
-{
- int ret;
-
- if (!ring->fbc_dirty)
- return 0;
-
- ret = intel_ring_begin(ring, 6);
- if (ret)
- return ret;
- /* WaFbcNukeOn3DBlt:ivb/hsw */
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit(ring, MSG_FBC_REND_STATE);
- intel_ring_emit(ring, value);
- intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
- intel_ring_emit(ring, MSG_FBC_REND_STATE);
- intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
- intel_ring_advance(ring);
-
- ring->fbc_dirty = false;
- return 0;
-}
-
static int
gen7_render_ring_flush(struct intel_engine_cs *ring,
u32 invalidate_domains, u32 flush_domains)
@@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
intel_ring_emit(ring, 0);
intel_ring_advance(ring);
- if (!invalidate_domains && flush_domains)
- return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
-
return 0;
}
@@ -458,14 +432,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
return ret;
}
- ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
- if (ret)
- return ret;
-
- if (!invalidate_domains && flush_domains)
- return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
-
- return 0;
+ return gen8_emit_pipe_control(ring, flags, scratch_addr);
}
static void ring_write_tail(struct intel_engine_cs *ring,
@@ -502,6 +469,68 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
I915_WRITE(HWS_PGA, addr);
}
+static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ u32 mmio = 0;
+
+ /* The ring status page addresses are no longer next to the rest of
+ * the ring registers as of gen7.
+ */
+ if (IS_GEN7(dev)) {
+ switch (ring->id) {
+ case RCS:
+ mmio = RENDER_HWS_PGA_GEN7;
+ break;
+ case BCS:
+ mmio = BLT_HWS_PGA_GEN7;
+ break;
+ /*
+ * VCS2 actually doesn't exist on Gen7. Only shut up
+ * gcc switch check warning
+ */
+ case VCS2:
+ case VCS:
+ mmio = BSD_HWS_PGA_GEN7;
+ break;
+ case VECS:
+ mmio = VEBOX_HWS_PGA_GEN7;
+ break;
+ }
+ } else if (IS_GEN6(ring->dev)) {
+ mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
+ } else {
+ /* XXX: gen8 returns to sanity */
+ mmio = RING_HWS_PGA(ring->mmio_base);
+ }
+
+ I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
+ POSTING_READ(mmio);
+
+ /*
+ * Flush the TLB for this page
+ *
+ * FIXME: These two bits have disappeared on gen8, so a question
+ * arises: do we still need this and if so how should we go about
+ * invalidating the TLB?
+ */
+ if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
+ u32 reg = RING_INSTPM(ring->mmio_base);
+
+ /* ring should be idle before issuing a sync flush*/
+ WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
+
+ I915_WRITE(reg,
+ _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+ INSTPM_SYNC_FLUSH));
+ if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
+ 1000))
+ DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+ ring->name);
+ }
+}
+
static bool stop_ring(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv = to_i915(ring->dev);
@@ -788,12 +817,14 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
* workaround for for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
*/
- /* WaForceEnableNonCoherent:bdw */
- /* WaHdcDisableFetchWhenMasked:bdw */
- /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ /* WaForceEnableNonCoherent:bdw */
HDC_FORCE_NON_COHERENT |
+ /* WaForceContextSaveRestoreNonCoherent:bdw */
+ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+ /* WaHdcDisableFetchWhenMasked:bdw */
HDC_DONOT_FETCH_MEM_WHEN_MASKED |
+ /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
(IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
@@ -870,9 +901,132 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4);
+ if (INTEL_REVID(dev) == SKL_REVID_C0 ||
+ INTEL_REVID(dev) == SKL_REVID_D0)
+ /* WaBarrierPerformanceFixDisable:skl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FENCE_DEST_SLM_DISABLE |
+ HDC_BARRIER_PERFORMANCE_DISABLE);
+
return 0;
}
+static int gen9_init_workarounds(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ /* WaDisablePartialInstShootdown:skl */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+
+ /* Syncing dependencies between camera and graphics */
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
+
+ if (INTEL_REVID(dev) == SKL_REVID_A0 ||
+ INTEL_REVID(dev) == SKL_REVID_B0) {
+ /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */
+ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
+ GEN9_DG_MIRROR_FIX_ENABLE);
+ }
+
+ if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) {
+ /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */
+ WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
+ GEN9_RHWO_OPTIMIZATION_DISABLE);
+ WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
+ DISABLE_PIXEL_MASK_CAMMING);
+ }
+
+ if (INTEL_REVID(dev) >= SKL_REVID_C0) {
+ /* WaEnableYV12BugFixInHalfSliceChicken7:skl */
+ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_ENABLE_YV12_BUGFIX);
+ }
+
+ if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+ /*
+ *Use Force Non-Coherent whenever executing a 3D context. This
+ * is a workaround for a possible hang in the unlikely event
+ * a TLB invalidation occurs during a PSD flush.
+ */
+ /* WaForceEnableNonCoherent:skl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FORCE_NON_COHERENT);
+ }
+
+ /* Wa4x4STCOptimizationDisable:skl */
+ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
+
+ /* WaDisablePartialResolveInVc:skl */
+ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
+
+ /* WaCcsTlbPrefetchDisable:skl */
+ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
+ GEN9_CCS_TLB_PREFETCH_ENABLE);
+
+ return 0;
+}
+
+static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u8 vals[3] = { 0, 0, 0 };
+ unsigned int i;
+
+ for (i = 0; i < 3; i++) {
+ u8 ss;
+
+ /*
+ * Only consider slices where one, and only one, subslice has 7
+ * EUs
+ */
+ if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
+ continue;
+
+ /*
+ * subslice_7eu[i] != 0 (because of the check above) and
+ * ss_max == 4 (maximum number of subslices possible per slice)
+ *
+ * -> 0 <= ss <= 3;
+ */
+ ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
+ vals[i] = 3 - ss;
+ }
+
+ if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
+ return 0;
+
+ /* Tune IZ hashing. See intel_device_info_runtime_init() */
+ WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+ GEN9_IZ_HASHING_MASK(2) |
+ GEN9_IZ_HASHING_MASK(1) |
+ GEN9_IZ_HASHING_MASK(0),
+ GEN9_IZ_HASHING(2, vals[2]) |
+ GEN9_IZ_HASHING(1, vals[1]) |
+ GEN9_IZ_HASHING(0, vals[0]));
+
+ return 0;
+}
+
+
+static int skl_init_workarounds(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ gen9_init_workarounds(ring);
+
+ /* WaDisablePowerCompilerClockGating:skl */
+ if (INTEL_REVID(dev) == SKL_REVID_B0)
+ WA_SET_BIT_MASKED(HIZ_CHICKEN,
+ BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
+
+ return skl_tune_iz_hashing(ring);
+}
+
int init_workarounds_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
@@ -888,6 +1042,11 @@ int init_workarounds_ring(struct intel_engine_cs *ring)
if (IS_CHERRYVIEW(dev))
return chv_init_workarounds(ring);
+ if (IS_SKYLAKE(dev))
+ return skl_init_workarounds(ring);
+ else if (IS_GEN9(dev))
+ return gen9_init_workarounds(ring);
+
return 0;
}
@@ -1386,68 +1545,6 @@ i8xx_ring_put_irq(struct intel_engine_cs *ring)
spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
}
-void intel_ring_setup_status_page(struct intel_engine_cs *ring)
-{
- struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = ring->dev->dev_private;
- u32 mmio = 0;
-
- /* The ring status page addresses are no longer next to the rest of
- * the ring registers as of gen7.
- */
- if (IS_GEN7(dev)) {
- switch (ring->id) {
- case RCS:
- mmio = RENDER_HWS_PGA_GEN7;
- break;
- case BCS:
- mmio = BLT_HWS_PGA_GEN7;
- break;
- /*
- * VCS2 actually doesn't exist on Gen7. Only shut up
- * gcc switch check warning
- */
- case VCS2:
- case VCS:
- mmio = BSD_HWS_PGA_GEN7;
- break;
- case VECS:
- mmio = VEBOX_HWS_PGA_GEN7;
- break;
- }
- } else if (IS_GEN6(ring->dev)) {
- mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
- } else {
- /* XXX: gen8 returns to sanity */
- mmio = RING_HWS_PGA(ring->mmio_base);
- }
-
- I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
- POSTING_READ(mmio);
-
- /*
- * Flush the TLB for this page
- *
- * FIXME: These two bits have disappeared on gen8, so a question
- * arises: do we still need this and if so how should we go about
- * invalidating the TLB?
- */
- if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
- u32 reg = RING_INSTPM(ring->mmio_base);
-
- /* ring should be idle before issuing a sync flush*/
- WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
- I915_WRITE(reg,
- _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
- INSTPM_SYNC_FLUSH));
- if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
- 1000))
- DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
- ring->name);
- }
-}
-
static int
bsd_ring_flush(struct intel_engine_cs *ring,
u32 invalidate_domains,
@@ -1611,7 +1708,7 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)
static int
i965_dispatch_execbuffer(struct intel_engine_cs *ring,
u64 offset, u32 length,
- unsigned flags)
+ unsigned dispatch_flags)
{
int ret;
@@ -1622,7 +1719,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
MI_BATCH_GTT |
- (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
+ (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_NON_SECURE_I965));
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
@@ -1635,8 +1733,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
static int
i830_dispatch_execbuffer(struct intel_engine_cs *ring,
- u64 offset, u32 len,
- unsigned flags)
+ u64 offset, u32 len,
+ unsigned dispatch_flags)
{
u32 cs_offset = ring->scratch.gtt_offset;
int ret;
@@ -1654,7 +1752,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
- if ((flags & I915_DISPATCH_PINNED) == 0) {
+ if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
if (len > I830_BATCH_LIMIT)
return -ENOSPC;
@@ -1686,7 +1784,8 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
return ret;
intel_ring_emit(ring, MI_BATCH_BUFFER);
- intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+ intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_NON_SECURE));
intel_ring_emit(ring, offset + len - 8);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
@@ -1697,7 +1796,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
static int
i915_dispatch_execbuffer(struct intel_engine_cs *ring,
u64 offset, u32 len,
- unsigned flags)
+ unsigned dispatch_flags)
{
int ret;
@@ -1706,7 +1805,8 @@ i915_dispatch_execbuffer(struct intel_engine_cs *ring,
return ret;
intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
- intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+ intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_NON_SECURE));
intel_ring_advance(ring);
return 0;
@@ -2097,6 +2197,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring)
kref_init(&request->ref);
request->ring = ring;
+ request->ringbuf = ring->buffer;
request->uniq = dev_private->request_uniq++;
ret = i915_gem_get_seqno(ring->dev, &request->seqno);
@@ -2273,9 +2374,10 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
static int
gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
u64 offset, u32 len,
- unsigned flags)
+ unsigned dispatch_flags)
{
- bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
+ bool ppgtt = USES_PPGTT(ring->dev) &&
+ !(dispatch_flags & I915_DISPATCH_SECURE);
int ret;
ret = intel_ring_begin(ring, 4);
@@ -2294,8 +2396,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
static int
hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
- u64 offset, u32 len,
- unsigned flags)
+ u64 offset, u32 len,
+ unsigned dispatch_flags)
{
int ret;
@@ -2305,7 +2407,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
- (flags & I915_DISPATCH_SECURE ?
+ (dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
@@ -2317,7 +2419,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
static int
gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
u64 offset, u32 len,
- unsigned flags)
+ unsigned dispatch_flags)
{
int ret;
@@ -2327,7 +2429,8 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
- (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
+ (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_NON_SECURE_I965));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
@@ -2341,7 +2444,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
u32 invalidate, u32 flush)
{
struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t cmd;
int ret;
@@ -2350,7 +2452,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
return ret;
cmd = MI_FLUSH_DW;
- if (INTEL_INFO(ring->dev)->gen >= 8)
+ if (INTEL_INFO(dev)->gen >= 8)
cmd += 1;
/* We always require a command barrier so that subsequent
@@ -2370,7 +2472,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
cmd |= MI_INVALIDATE_TLB;
intel_ring_emit(ring, cmd);
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
- if (INTEL_INFO(ring->dev)->gen >= 8) {
+ if (INTEL_INFO(dev)->gen >= 8) {
intel_ring_emit(ring, 0); /* upper addr */
intel_ring_emit(ring, 0); /* value */
} else {
@@ -2379,13 +2481,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
}
intel_ring_advance(ring);
- if (!invalidate && flush) {
- if (IS_GEN7(dev))
- return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
- else if (IS_BROADWELL(dev))
- dev_priv->fbc.need_sw_cache_clean = true;
- }
-
return 0;
}
@@ -2612,19 +2707,13 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
}
/**
- * Initialize the second BSD ring for Broadwell GT3.
- * It is noted that this only exists on Broadwell GT3.
+ * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
*/
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
- if ((INTEL_INFO(dev)->gen != 8)) {
- DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
- return -EINVAL;
- }
-
ring->name = "bsd2 ring";
ring->id = VCS2;