diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 572 |
1 files changed, 304 insertions, 268 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8375054ba27d..07f663cd2d1c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -43,6 +43,7 @@ #include "i915_fixed.h" #include "i915_irq.h" #include "i915_trace.h" +#include "display/intel_bw.h" #include "intel_pm.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" @@ -3637,10 +3638,6 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - /* HACK! */ - if (IS_GEN(dev_priv, 12)) - return false; - return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } @@ -3757,42 +3754,120 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; } -bool intel_can_enable_sagv(struct intel_atomic_state *state) +void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { - struct drm_device *dev = state->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc; - struct intel_plane *plane; - struct intel_crtc_state *crtc_state; - enum pipe pipe; - int level, latency; + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ if (!intel_has_sagv(dev_priv)) - return false; + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; + + if (INTEL_GEN(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) { + intel_disable_sagv(dev_priv); + return; + } + old_bw_state = intel_atomic_get_old_bw_state(state); /* - * If there are no active CRTCs, no additional checks need be performed + * Nothing to mask */ - if (hweight8(state->active_pipes) == 0) - return true; + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; /* - * SKL+ workaround: bspec recommends we disable SAGV when we have - * more then one pipe enabled + * If new mask is zero - means there is nothing to mask, + * we can only unmask, which should be done in unmask. */ - if (hweight8(state->active_pipes) > 1) + if (!new_mask) + return; + + /* + * Restrict required qgv points before updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); +} + +void intel_sagv_post_plane_update(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; + + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ + if (!intel_has_sagv(dev_priv)) + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; + + if (INTEL_GEN(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) { + intel_enable_sagv(dev_priv); + return; + } + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * Nothing to unmask + */ + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = new_bw_state->qgv_points_mask; + + /* + * Allow required qgv points after updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); +} + +static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; + const struct intel_plane_state *plane_state; + int level, latency; + + if (!intel_has_sagv(dev_priv)) return false; - /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_pipes) - 1; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = to_intel_crtc_state(crtc->base.state); + if (!crtc_state->hw.active) + return true; if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct skl_plane_wm *wm = + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane->id]; /* Skip this plane if it's not enabled */ @@ -3807,7 +3882,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; if (skl_needs_memory_bw_wa(dev_priv) && - plane->base.state->fb->modifier == + plane_state->uapi.fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3823,6 +3898,112 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) return true; } +static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum plane_id plane_id; + + if (!crtc_state->hw.active) + return true; + + for_each_plane_id_on_crtc(crtc, plane_id) { + const struct skl_ddb_entry *plane_alloc = + &crtc_state->wm.skl.plane_ddb_y[plane_id]; + const struct skl_plane_wm *wm = + &crtc_state->wm.skl.optimal.planes[plane_id]; + + if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc) + return false; + } + + return true; +} + +static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + return tgl_crtc_can_enable_sagv(crtc_state); + else + return skl_crtc_can_enable_sagv(crtc_state); +} + +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, + const struct intel_bw_state *bw_state) +{ + if (INTEL_GEN(dev_priv) < 11 && + bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) + return false; + + return bw_state->pipe_sagv_reject == 0; +} + +static int intel_compute_sagv_mask(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + int ret; + struct intel_crtc *crtc; + struct intel_crtc_state *new_crtc_state; + struct intel_bw_state *new_bw_state = NULL; + const struct intel_bw_state *old_bw_state = NULL; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); + + old_bw_state = intel_atomic_get_old_bw_state(state); + + if (intel_crtc_can_enable_sagv(new_crtc_state)) + new_bw_state->pipe_sagv_reject &= ~BIT(crtc->pipe); + else + new_bw_state->pipe_sagv_reject |= BIT(crtc->pipe); + } + + if (!new_bw_state) + return 0; + + new_bw_state->active_pipes = + intel_calc_active_pipes(state, old_bw_state->active_pipes); + + if (new_bw_state->active_pipes != old_bw_state->active_pipes) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; + + /* + * We store use_sagv_wm in the crtc state rather than relying on + * that bw state since we have no convenient way to get at the + * latter from the plane commit hooks (especially in the legacy + * cursor case) + */ + pipe_wm->use_sagv_wm = INTEL_GEN(dev_priv) >= 12 && + intel_can_enable_sagv(dev_priv, new_bw_state); + } + + if (intel_can_enable_sagv(dev_priv, new_bw_state) != + intel_can_enable_sagv(dev_priv, old_bw_state)) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + + return 0; +} + /* * Calculate initial DBuf slice offset, based on slice size * and mask(i.e if slice size is 1024 and second slice is enabled @@ -4016,6 +4197,7 @@ static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state, int color_plane); static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, int level, + unsigned int latency, const struct skl_wm_params *wp, const struct skl_wm_level *result_prev, struct skl_wm_level *result /* out */); @@ -4038,7 +4220,9 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state, drm_WARN_ON(&dev_priv->drm, ret); for (level = 0; level <= max_level; level++) { - skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm); + unsigned int latency = dev_priv->wm.skl_latency[level]; + + skl_compute_plane_wm(crtc_state, level, latency, &wp, &wm, &wm); if (wm.min_ddb_alloc == U16_MAX) break; @@ -4544,6 +4728,20 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, return total_data_rate; } +static const struct skl_wm_level * +skl_plane_wm_level(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, + int level) +{ + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + + if (level == 0 && pipe_wm->use_sagv_wm) + return &wm->sagv_wm0; + + return &wm->wm[level]; +} + static int skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) { @@ -4580,7 +4778,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) plane_data_rate, uv_plane_data_rate); - skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); @@ -4780,7 +4977,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate, wm_intermediate_val = latency * pixel_rate * cpp; ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) ret = add_fixed16_u32(ret, 1); return ret; @@ -4915,18 +5112,19 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_min_scanlines, wp->dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) interm_pbpl++; wp->plane_blocks_per_line = div_fixed16(interm_pbpl, wp->y_min_scanlines); - } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) { - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size); - wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size) + 1; + wp->dbuf_block_size); + + if (!wp->x_tiled || + INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + interm_pbpl++; + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } @@ -4972,12 +5170,12 @@ static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, int level, + unsigned int latency, const struct skl_wm_params *wp, const struct skl_wm_level *result_prev, struct skl_wm_level *result /* out */) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; u32 res_blocks, res_lines, min_ddb_alloc = 0; @@ -4992,7 +5190,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; @@ -5106,14 +5304,29 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state, for (level = 0; level <= max_level; level++) { struct skl_wm_level *result = &levels[level]; + unsigned int latency = dev_priv->wm.skl_latency[level]; - skl_compute_plane_wm(crtc_state, level, wm_params, - result_prev, result); + skl_compute_plane_wm(crtc_state, level, latency, + wm_params, result_prev, result); result_prev = result; } } +static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, + const struct skl_wm_params *wm_params, + struct skl_plane_wm *plane_wm) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0; + struct skl_wm_level *levels = plane_wm->wm; + unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + + skl_compute_plane_wm(crtc_state, 0, latency, + wm_params, &levels[0], + sagv_wm); +} + static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, const struct skl_wm_params *wp, struct skl_plane_wm *wm) @@ -5166,10 +5379,6 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, trans_offset_b; } else { res_blocks = wm0_sel_res_b + trans_offset_b; - - /* WA BUG:1938466 add one block for non y-tile planes */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) - res_blocks += 1; } /* @@ -5185,6 +5394,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, enum plane_id plane_id, int color_plane) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; struct skl_wm_params wm_params; int ret; @@ -5195,6 +5406,10 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, return ret; skl_compute_wm_levels(crtc_state, &wm_params, wm->wm); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_compute_sagv_wm(crtc_state, &wm_params, wm); + skl_compute_transition_wm(crtc_state, &wm_params, wm); return 0; @@ -5354,8 +5569,12 @@ void skl_write_plane_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_uv[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), &wm->trans_wm); @@ -5388,8 +5607,12 @@ void skl_write_cursor_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_y[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, CUR_WM(pipe, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); @@ -5424,8 +5647,8 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); } -static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, - const struct skl_ddb_entry *b) +static bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, + const struct skl_ddb_entry *b) { return a->start < b->end && b->start < a->end; } @@ -5553,23 +5776,25 @@ skl_print_wm_changes(struct intel_atomic_state *state) continue; drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), enast(old_wm->trans_wm.plane_en), + enast(old_wm->sagv_wm0.plane_en), enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en)); + enast(new_wm->trans_wm.plane_en), + enast(new_wm->sagv_wm0.plane_en)); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" - " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" + " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, @@ -5580,6 +5805,7 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, + enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l, enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, @@ -5589,37 +5815,42 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l, + enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, old_wm->trans_wm.plane_res_b, + old_wm->sagv_wm0.plane_res_b, new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b); + new_wm->trans_wm.plane_res_b, + new_wm->sagv_wm0.plane_res_b); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, old_wm->trans_wm.min_ddb_alloc, + old_wm->sagv_wm0.min_ddb_alloc, new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc); + new_wm->trans_wm.min_ddb_alloc, + new_wm->sagv_wm0.min_ddb_alloc); } } } @@ -5780,6 +6011,10 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; + ret = intel_compute_sagv_mask(state); + if (ret) + return ret; + /* * skl_compute_ddb() will have adjusted the final watermarks * based on how much ddb is available. Now we can actually @@ -5876,8 +6111,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state, mutex_unlock(&dev_priv->wm.wm_mutex); } -static inline void skl_wm_level_from_reg_val(u32 val, - struct skl_wm_level *level) +static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) { level->plane_en = val & PLANE_WM_EN; level->ignore_lines = val & PLANE_WM_IGNORE_LINES; @@ -5909,6 +6143,9 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, skl_wm_level_from_reg_val(val, &wm->wm[level]); } + if (INTEL_GEN(dev_priv) >= 12) + wm->sagv_wm0 = wm->wm[0]; + if (plane_id != PLANE_CURSOR) val = I915_READ(PLANE_WM_TRANS(pipe, plane_id)); else @@ -6593,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); g4x_disable_trickle_feed(dev_priv); @@ -6665,27 +6892,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_BLBUNIT_CLOCK_GATE_DISABLE | @@ -6708,18 +6914,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - /* * According to the spec the following bits should be * set in order to enable memory self-refresh and fbc: @@ -6749,24 +6943,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) gen6_check_mch_setup(dev_priv); } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -6850,6 +7026,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); + + /* Wa_14011059788:tgl */ + intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, + 0, DFR_DISABLE); } static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) @@ -6882,9 +7062,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE); /* ReadHitWriteOnlyDisable:cnl */ val |= RCCUNIT_CLKGATE_DIS; - /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) - val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); /* Wa_2201832410:cnl */ @@ -6992,45 +7169,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); - /* This is required by WaCatErrorRejectionIssue:hsw */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* WaSampleCChickenBitEnable:hsw */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -7044,32 +7186,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:ivb */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev_priv)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); if (IS_IVB_GT1(dev_priv)) I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7081,10 +7202,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); } - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:ivb workaround. @@ -7099,29 +7216,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) g4x_disable_trickle_feed(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); - - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } - - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; snpcr |= GEN6_MBC_SNPCR_MED; @@ -7135,28 +7229,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) { - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* WaDisableBackToBackFlipFix:vlv */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - /* WaDisableDopClockGating:vlv */ I915_WRITE(GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7166,8 +7243,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - gen7_setup_fixed_func_scheduler(dev_priv); - /* * According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating:vlv workaround. @@ -7182,30 +7257,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); - - /* * WaDisableVLVClockGating_VBIIssue:vlv * Disable clock gating on th GCFG unit to prevent a delay * in the reporting of vblank events. @@ -7257,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev_priv); } @@ -7279,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - intel_uncore_write(uncore, - CACHE_MODE_0, - _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7296,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(RENCLK_GATE_D2, 0); I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) |