From f4d4097a036ab1ec6bc8154160c49ab9a0a92895 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Tue, 9 Sep 2025 12:09:28 +0000 Subject: drm/i915/gem: Avoid accessing uninitialized context in emit_rpcs_query() Following the error path in that function may lead to usage of uninitialized struct i915_gem_ww_ctx object, so move call to i915_gem_ww_ctx_init() a bit earlier. Cc: Maarten Lankhorst Signed-off-by: Krzysztof Karas Reviewed-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/casutxyfjv7o4ivadvbich2sq2dt22btc5wcke55r56ptgxx2h@lv7hnxrqw5rq --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index eb0158e43417..1330c0b431a7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -962,13 +962,14 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (IS_ERR(rpcs)) return PTR_ERR(rpcs); + i915_gem_ww_ctx_init(&ww, false); + batch = i915_vma_instance(rpcs, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_put; } - i915_gem_ww_ctx_init(&ww, false); retry: err = i915_gem_object_lock(obj, &ww); if (!err) -- cgit v1.2.3 From 820e067b94729f4b93a01555a3d7d775510021aa Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 3 Sep 2025 10:08:21 -0700 Subject: drm/i915/display: Use DISPLAY_VER over GRAPHICS_VER The checks in plane_has_modifier() should check against display version instead of graphics version. Bspec: 67165, 70815 Signed-off-by: Matt Atwood Reviewed-by: Juha-Pekka Heikkila Link: https://lore.kernel.org/r/20250903170821.310143-1-matthew.s.atwood@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_fb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 22a4a1575d22..69237dabdae8 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -564,11 +564,11 @@ static bool plane_has_modifier(struct intel_display *display, return false; if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS && - (GRAPHICS_VER(i915) < 20 || !display->platform.dgfx)) + (DISPLAY_VER(display) < 14 || !display->platform.dgfx)) return false; if (md->modifier == I915_FORMAT_MOD_4_TILED_LNL_CCS && - (GRAPHICS_VER(i915) < 20 || display->platform.dgfx)) + (DISPLAY_VER(display) < 20 || display->platform.dgfx)) return false; return true; -- cgit v1.2.3 From c8cf6e3cc27c52cfe42d246e96f6e83266b9a0db Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:40 +0300 Subject: drm/i915: do cck get/put inside vlv_get_hpll_vco() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move towards VLV/CHV clock interfaces that handle sideband get/put inside them instead of at the caller. We'll need to move the calls outside of existing get/put. Suggested-by: Ville Syrjälä Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/1a6553f54619275aa05512421e19115a71cd3eb0.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 3 ++- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 9725eebe5706..c54c7fd93f97 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -608,9 +608,10 @@ static void vlv_get_cdclk(struct intel_display *display, { u32 val; + cdclk_config->vco = vlv_get_hpll_vco(display->drm); + vlv_iosf_sb_get(display->drm, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); - cdclk_config->vco = vlv_get_hpll_vco(display->drm); cdclk_config->cdclk = vlv_get_cck_clock(display->drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_config->vco); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5dca7f96b425..f5208583235d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -146,10 +146,14 @@ int vlv_get_hpll_vco(struct drm_device *drm) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; + vlv_cck_get(drm); + /* Obtain SKU information */ hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; + vlv_cck_put(drm); + return vco_freq[hpll_freq] * 1000; } @@ -175,11 +179,11 @@ int vlv_get_cck_clock_hpll(struct drm_device *drm, struct drm_i915_private *dev_priv = to_i915(drm); int hpll; - vlv_cck_get(drm); - if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = vlv_get_hpll_vco(drm); + vlv_cck_get(drm); + hpll = vlv_get_cck_clock(drm, name, reg, dev_priv->hpll_freq); vlv_cck_put(drm); -- cgit v1.2.3 From 7d112811787f03030f301975af4079a1efe7cdcc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:41 +0300 Subject: drm/i915: do cck get/put inside vlv_get_cck_clock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move towards VLV/CHV clock interfaces that handle sideband get/put inside them instead of at the caller. With this, we can switch to the simpler vlv_punit_get()/vlv_punit_put() in vlv_get_cdclk(). We'll need to move vlv_init_gpll_ref_freq() outside of the existing get/put in vlv_rps_init() and chv_rps_init(). Suggested-by: Ville Syrjälä Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/480b654b6c736a03343dfd17eb130c39fd82c637.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 8 ++------ drivers/gpu/drm/i915/display/intel_display.c | 7 +++---- drivers/gpu/drm/i915/gt/intel_rps.c | 8 ++++---- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c54c7fd93f97..bf4e975ac41c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -609,17 +609,13 @@ static void vlv_get_cdclk(struct intel_display *display, u32 val; cdclk_config->vco = vlv_get_hpll_vco(display->drm); - - vlv_iosf_sb_get(display->drm, BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); - cdclk_config->cdclk = vlv_get_cck_clock(display->drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_config->vco); + vlv_punit_get(display->drm); val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); - - vlv_iosf_sb_put(display->drm, - BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); + vlv_punit_put(display->drm); if (display->platform.valleyview) cdclk_config->voltage_level = (val & DSPFREQGUAR_MASK) >> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f5208583235d..aef136a1be25 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -163,7 +163,10 @@ int vlv_get_cck_clock(struct drm_device *drm, u32 val; int divider; + vlv_cck_get(drm); val = vlv_cck_read(drm, reg); + vlv_cck_put(drm); + divider = val & CCK_FREQUENCY_VALUES; drm_WARN(drm, (val & CCK_FREQUENCY_STATUS) != @@ -182,12 +185,8 @@ int vlv_get_cck_clock_hpll(struct drm_device *drm, if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = vlv_get_hpll_vco(drm); - vlv_cck_get(drm); - hpll = vlv_get_cck_clock(drm, name, reg, dev_priv->hpll_freq); - vlv_cck_put(drm); - return hpll; } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4da94098bd3e..afc934b7f5bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1703,13 +1703,13 @@ static void vlv_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + vlv_init_gpll_ref_freq(rps); + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); - vlv_init_gpll_ref_freq(rps); - rps->max_freq = vlv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", @@ -1737,13 +1737,13 @@ static void chv_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + vlv_init_gpll_ref_freq(rps); + vlv_iosf_sb_get(&i915->drm, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); - vlv_init_gpll_ref_freq(rps); - rps->max_freq = chv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", -- cgit v1.2.3 From 01c46fcef51f980219cfcd4ee8b3f2688ffcf509 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:42 +0300 Subject: drm/i915: add vlv_clock_get_gpll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a vlv_clock_get_gpll() helper to hide the details from the callers. Reviewed-by: Mika Kahola Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/2589396fa14388d7709d2b01f1d32f9f38dab11a.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 5 +---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index aef136a1be25..4599c2f37682 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -190,6 +190,14 @@ int vlv_get_cck_clock_hpll(struct drm_device *drm, return hpll; } +int vlv_clock_get_gpll(struct drm_device *drm) +{ + struct drm_i915_private *i915 = to_i915(drm); + + return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); +} + void intel_update_czclk(struct intel_display *display) { struct drm_i915_private *dev_priv = to_i915(display->drm); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 37e2ab301a80..7ae899b8787a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -440,6 +440,7 @@ int vlv_get_cck_clock(struct drm_device *drm, const char *name, u32 reg, int ref_freq); int vlv_get_cck_clock_hpll(struct drm_device *drm, const char *name, u32 reg); +int vlv_clock_get_gpll(struct drm_device *drm); bool intel_has_pending_fb_unpin(struct intel_display *display); void intel_encoder_destroy(struct drm_encoder *encoder); struct drm_display_mode * diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index afc934b7f5bc..f19353f04228 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1690,10 +1690,7 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - rps->gpll_ref_freq = - vlv_get_cck_clock(&i915->drm, "GPLL ref", - CCK_GPLL_CLOCK_CONTROL, - i915->czclk_freq); + rps->gpll_ref_freq = vlv_clock_get_gpll(&i915->drm); drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); -- cgit v1.2.3 From 8c2833ff1df3e2e39a513b756cc06f2fd43d6a02 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:43 +0300 Subject: drm/i915: add vlv_clock_get_czclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add vlv_clock_get_czclk() helper to avoid looking at i915->czclk_freq directly. Reviewed-by: Mika Kahola Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/4885f6e486a31c773a3bfebd6936670234e57bd0.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 3 +-- drivers/gpu/drm/i915/display/intel_display.c | 20 ++++++++++++++------ drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/gt/intel_rc6.c | 3 ++- drivers/gpu/drm/i915/gt/intel_rps.c | 3 ++- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index bf4e975ac41c..3025951eac28 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -627,7 +627,6 @@ static void vlv_get_cdclk(struct intel_display *display, static void vlv_program_pfi_credits(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); unsigned int credits, default_credits; if (display->platform.cherryview) @@ -635,7 +634,7 @@ static void vlv_program_pfi_credits(struct intel_display *display) else default_credits = PFI_CREDIT(8); - if (display->cdclk.hw.cdclk >= dev_priv->czclk_freq) { + if (display->cdclk.hw.cdclk >= vlv_clock_get_czclk(display->drm)) { /* CHV suggested value is 31 or 63 */ if (display->platform.cherryview) credits = PFI_CREDIT_63; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4599c2f37682..eca9f2508e9d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -190,25 +190,33 @@ int vlv_get_cck_clock_hpll(struct drm_device *drm, return hpll; } -int vlv_clock_get_gpll(struct drm_device *drm) +int vlv_clock_get_czclk(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); + if (!i915->czclk_freq) + i915->czclk_freq = vlv_get_cck_clock_hpll(drm, "czclk", + CCK_CZ_CLOCK_CONTROL); + + return i915->czclk_freq; +} + +int vlv_clock_get_gpll(struct drm_device *drm) +{ return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, - i915->czclk_freq); + vlv_clock_get_czclk(drm)); } void intel_update_czclk(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); + int czclk_freq; if (!display->platform.valleyview && !display->platform.cherryview) return; - dev_priv->czclk_freq = vlv_get_cck_clock_hpll(display->drm, "czclk", - CCK_CZ_CLOCK_CONTROL); + czclk_freq = vlv_clock_get_czclk(display->drm); - drm_dbg_kms(display->drm, "CZ clock rate: %d kHz\n", dev_priv->czclk_freq); + drm_dbg_kms(display->drm, "CZ clock rate: %d kHz\n", czclk_freq); } static bool is_hdr_mode(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 7ae899b8787a..811066a9e69d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -440,6 +440,7 @@ int vlv_get_cck_clock(struct drm_device *drm, const char *name, u32 reg, int ref_freq); int vlv_get_cck_clock_hpll(struct drm_device *drm, const char *name, u32 reg); +int vlv_clock_get_czclk(struct drm_device *drm); int vlv_clock_get_gpll(struct drm_device *drm); bool intel_has_pending_fb_unpin(struct intel_display *display); void intel_encoder_destroy(struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index bf38cc5fe872..ef8b2fd2ae69 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -6,6 +6,7 @@ #include #include +#include "display/intel_display.h" #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_reg.h" @@ -802,7 +803,7 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id) /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { mul = 1000000; - div = i915->czclk_freq; + div = vlv_clock_get_czclk(&i915->drm); overflow_hw = BIT_ULL(40); time_hw = vlv_residency_raw(uncore, reg); } else { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f19353f04228..db9cfd2b2b89 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1777,6 +1777,7 @@ static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) { + struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; @@ -1793,7 +1794,7 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) time = ktime_us_delta(now.ktime, prev->ktime); - time *= rps_to_i915(rps)->czclk_freq; + time *= vlv_clock_get_czclk(&i915->drm); /* Workload can be split between render + media, * e.g. SwapBuffers being blitted in X after being rendered in -- cgit v1.2.3 From 9c2f7992551f4addd4a6343d9b58b39ad014e6dc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:44 +0300 Subject: drm/i915: add vlv_clock_get_hrawclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add vlv_clock_get_hrawclk() helper to hide the details from the callers. Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/ad3c3d0baf16eb0ef3a0ac3edfbab327c564e743.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 9 +-------- drivers/gpu/drm/i915/display/intel_display.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3025951eac28..45ac378922ff 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3561,13 +3561,6 @@ static int pch_rawclk(struct intel_display *display) return (intel_de_read(display, PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; } -static int vlv_hrawclk(struct intel_display *display) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(display->drm, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - static int i9xx_hrawclk(struct intel_display *display) { struct drm_i915_private *i915 = to_i915(display->drm); @@ -3601,7 +3594,7 @@ u32 intel_read_rawclk(struct intel_display *display) else if (HAS_PCH_SPLIT(display)) freq = pch_rawclk(display); else if (display->platform.valleyview || display->platform.cherryview) - freq = vlv_hrawclk(display); + freq = vlv_clock_get_hrawclk(display->drm); else if (DISPLAY_VER(display) >= 3) freq = i9xx_hrawclk(display); else diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index eca9f2508e9d..487870811d3a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -190,6 +190,12 @@ int vlv_get_cck_clock_hpll(struct drm_device *drm, return hpll; } +int vlv_clock_get_hrawclk(struct drm_device *drm) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL); +} + int vlv_clock_get_czclk(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 811066a9e69d..dbfb4b4aee4e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -440,6 +440,7 @@ int vlv_get_cck_clock(struct drm_device *drm, const char *name, u32 reg, int ref_freq); int vlv_get_cck_clock_hpll(struct drm_device *drm, const char *name, u32 reg); +int vlv_clock_get_hrawclk(struct drm_device *drm); int vlv_clock_get_czclk(struct drm_device *drm); int vlv_clock_get_gpll(struct drm_device *drm); bool intel_has_pending_fb_unpin(struct intel_display *display); -- cgit v1.2.3 From ffbc0de5d3bca69700196fe090d45002c72f881e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:45 +0300 Subject: drm/i915: make vlv_get_cck_clock_hpll() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vlv_get_cck_clock_hpll() is no longer used outside of intel_display.c, make it static. Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/0a778d82e2be112b0cd37cd3329103a764967a1d.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- drivers/gpu/drm/i915/display/intel_display.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 487870811d3a..9ba6f439205a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -176,8 +176,8 @@ int vlv_get_cck_clock(struct drm_device *drm, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -int vlv_get_cck_clock_hpll(struct drm_device *drm, - const char *name, u32 reg) +static int vlv_get_cck_clock_hpll(struct drm_device *drm, + const char *name, u32 reg) { struct drm_i915_private *dev_priv = to_i915(drm); int hpll; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index dbfb4b4aee4e..5c9b57e94a65 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -438,8 +438,6 @@ void i830_disable_pipe(struct intel_display *display, enum pipe pipe); int vlv_get_hpll_vco(struct drm_device *drm); int vlv_get_cck_clock(struct drm_device *drm, const char *name, u32 reg, int ref_freq); -int vlv_get_cck_clock_hpll(struct drm_device *drm, - const char *name, u32 reg); int vlv_clock_get_hrawclk(struct drm_device *drm); int vlv_clock_get_czclk(struct drm_device *drm); int vlv_clock_get_gpll(struct drm_device *drm); -- cgit v1.2.3 From d451c5bff573681a277e2b77678c38441c8ec285 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:46 +0300 Subject: drm/i915: add vlv_clock_get_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add vlv_clock_get_cdclk() helper to hide the details from the callers. For now, this means running vlv_get_hpll_vco() twice in vlv_get_cdclk(), but this will be improved later. v2: Rebase Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/fc93ccf998300048432d18ce7e8690bd54e1e18d.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 4 +--- drivers/gpu/drm/i915/display/intel_display.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_display.h | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 45ac378922ff..6ea44bec4da5 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -609,9 +609,7 @@ static void vlv_get_cdclk(struct intel_display *display, u32 val; cdclk_config->vco = vlv_get_hpll_vco(display->drm); - cdclk_config->cdclk = vlv_get_cck_clock(display->drm, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL, - cdclk_config->vco); + cdclk_config->cdclk = vlv_clock_get_cdclk(display->drm); vlv_punit_get(display->drm); val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9ba6f439205a..4a19a3ce060c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -207,6 +207,12 @@ int vlv_clock_get_czclk(struct drm_device *drm) return i915->czclk_freq; } +int vlv_clock_get_cdclk(struct drm_device *drm) +{ + return vlv_get_cck_clock(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, + vlv_get_hpll_vco(drm)); +} + int vlv_clock_get_gpll(struct drm_device *drm) { return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 5c9b57e94a65..9fdbc4ad5391 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -440,6 +440,7 @@ int vlv_get_cck_clock(struct drm_device *drm, const char *name, u32 reg, int ref_freq); int vlv_clock_get_hrawclk(struct drm_device *drm); int vlv_clock_get_czclk(struct drm_device *drm); +int vlv_clock_get_cdclk(struct drm_device *drm); int vlv_clock_get_gpll(struct drm_device *drm); bool intel_has_pending_fb_unpin(struct intel_display *display); void intel_encoder_destroy(struct drm_encoder *encoder); -- cgit v1.2.3 From a6767dbba64cbe200e620ba72f1bc96a0a06fbc8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:47 +0300 Subject: drm/i915: make vlv_get_cck_clock() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vlv_get_cck_clock() is no longer used outside of intel_display.c, make it static. Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/bac1fe98d9d458ef30e973f680342b69a6cde4d6.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- drivers/gpu/drm/i915/display/intel_display.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4a19a3ce060c..462771435c4b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -157,8 +157,8 @@ int vlv_get_hpll_vco(struct drm_device *drm) return vco_freq[hpll_freq] * 1000; } -int vlv_get_cck_clock(struct drm_device *drm, - const char *name, u32 reg, int ref_freq) +static int vlv_get_cck_clock(struct drm_device *drm, + const char *name, u32 reg, int ref_freq) { u32 val; int divider; diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 9fdbc4ad5391..57b06cad314b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -436,8 +436,6 @@ void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state); void i830_enable_pipe(struct intel_display *display, enum pipe pipe); void i830_disable_pipe(struct intel_display *display, enum pipe pipe); int vlv_get_hpll_vco(struct drm_device *drm); -int vlv_get_cck_clock(struct drm_device *drm, - const char *name, u32 reg, int ref_freq); int vlv_clock_get_hrawclk(struct drm_device *drm); int vlv_clock_get_czclk(struct drm_device *drm); int vlv_clock_get_cdclk(struct drm_device *drm); -- cgit v1.2.3 From f6b784c44aa3a843596bb3143f6a9da025413d09 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:48 +0300 Subject: drm/i915: rename vlv_get_hpll_vco() to vlv_clock_get_hpll_vco() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the new vlv_clock_*() naming pattern for all the related VLV clock functions. v2: Rebase Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/75ac6b1cda2cb0afe3171250c4d5ba1ff81df877.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 6 +++--- drivers/gpu/drm/i915/display/intel_display.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6ea44bec4da5..ea1e6d964764 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -608,7 +608,7 @@ static void vlv_get_cdclk(struct intel_display *display, { u32 val; - cdclk_config->vco = vlv_get_hpll_vco(display->drm); + cdclk_config->vco = vlv_clock_get_hpll_vco(display->drm); cdclk_config->cdclk = vlv_clock_get_cdclk(display->drm); vlv_punit_get(display->drm); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 462771435c4b..022f32ffd697 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -142,7 +142,7 @@ static void bdw_set_pipe_misc(struct intel_dsb *dsb, const struct intel_crtc_state *crtc_state); /* returns HPLL frequency in kHz */ -int vlv_get_hpll_vco(struct drm_device *drm) +int vlv_clock_get_hpll_vco(struct drm_device *drm) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -183,7 +183,7 @@ static int vlv_get_cck_clock_hpll(struct drm_device *drm, int hpll; if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = vlv_get_hpll_vco(drm); + dev_priv->hpll_freq = vlv_clock_get_hpll_vco(drm); hpll = vlv_get_cck_clock(drm, name, reg, dev_priv->hpll_freq); @@ -210,7 +210,7 @@ int vlv_clock_get_czclk(struct drm_device *drm) int vlv_clock_get_cdclk(struct drm_device *drm) { return vlv_get_cck_clock(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, - vlv_get_hpll_vco(drm)); + vlv_clock_get_hpll_vco(drm)); } int vlv_clock_get_gpll(struct drm_device *drm) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 57b06cad314b..5c406b276a76 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -435,7 +435,7 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state); void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state); void i830_enable_pipe(struct intel_display *display, enum pipe pipe); void i830_disable_pipe(struct intel_display *display, enum pipe pipe); -int vlv_get_hpll_vco(struct drm_device *drm); +int vlv_clock_get_hpll_vco(struct drm_device *drm); int vlv_clock_get_hrawclk(struct drm_device *drm); int vlv_clock_get_czclk(struct drm_device *drm); int vlv_clock_get_cdclk(struct drm_device *drm); -- cgit v1.2.3 From a6e8325b862cf7db005b565424c59e277ac55539 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:49 +0300 Subject: drm/i915: cache the results in vlv_clock_get_hpll_vco() and use it more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vlv_clock_get_hpll_vco() helper more to avoid looking at i915->hpll_freq directly. Cache and return the cached results to avoid repeated lookups. v2: Rebase Reviewed-by: Michał Grzelak # v1 Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/14695618682d8d8fad1adc485de7a122c8e1494a.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 10 +++------- drivers/gpu/drm/i915/display/intel_display.c | 27 +++++++++++---------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ea1e6d964764..e77efa0f33ed 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -563,8 +563,7 @@ static void hsw_get_cdclk(struct intel_display *display, static int vlv_calc_cdclk(struct intel_display *display, int min_cdclk) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + int freq_320 = (vlv_clock_get_hpll_vco(display->drm) << 1) % 320000 != 0 ? 333333 : 320000; /* @@ -584,8 +583,6 @@ static int vlv_calc_cdclk(struct intel_display *display, int min_cdclk) static u8 vlv_calc_voltage_level(struct intel_display *display, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (display->platform.valleyview) { if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ return 2; @@ -599,7 +596,7 @@ static u8 vlv_calc_voltage_level(struct intel_display *display, int cdclk) * hardware has shown that we just need to write the desired * CCK divider into the Punit register. */ - return DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + return DIV_ROUND_CLOSEST(vlv_clock_get_hpll_vco(display->drm) << 1, cdclk) - 1; } } @@ -664,7 +661,6 @@ static void vlv_set_cdclk(struct intel_display *display, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int cdclk = cdclk_config->cdclk; u32 val, cmd = cdclk_config->voltage_level; intel_wakeref_t wakeref; @@ -709,7 +705,7 @@ static void vlv_set_cdclk(struct intel_display *display, if (cdclk == 400000) { u32 divider; - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + divider = DIV_ROUND_CLOSEST(vlv_clock_get_hpll_vco(display->drm) << 1, cdclk) - 1; /* adjust cdclk divider */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 022f32ffd697..7b5379262a37 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -144,17 +144,20 @@ static void bdw_set_pipe_misc(struct intel_dsb *dsb, /* returns HPLL frequency in kHz */ int vlv_clock_get_hpll_vco(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; - vlv_cck_get(drm); - - /* Obtain SKU information */ - hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & - CCK_FUSE_HPLL_FREQ_MASK; + if (!i915->hpll_freq) { + vlv_cck_get(drm); + /* Obtain SKU information */ + hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & + CCK_FUSE_HPLL_FREQ_MASK; + vlv_cck_put(drm); - vlv_cck_put(drm); + i915->hpll_freq = vco_freq[hpll_freq] * 1000; + } - return vco_freq[hpll_freq] * 1000; + return i915->hpll_freq; } static int vlv_get_cck_clock(struct drm_device *drm, @@ -179,15 +182,7 @@ static int vlv_get_cck_clock(struct drm_device *drm, static int vlv_get_cck_clock_hpll(struct drm_device *drm, const char *name, u32 reg) { - struct drm_i915_private *dev_priv = to_i915(drm); - int hpll; - - if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = vlv_clock_get_hpll_vco(drm); - - hpll = vlv_get_cck_clock(drm, name, reg, dev_priv->hpll_freq); - - return hpll; + return vlv_get_cck_clock(drm, name, reg, vlv_clock_get_hpll_vco(drm)); } int vlv_clock_get_hrawclk(struct drm_device *drm) -- cgit v1.2.3 From 73383c3062e895125abe76fa3cbb8febc2deb73b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:50 +0300 Subject: drm/i915: remove vlv_get_cck_clock_hpll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has become so trivial it's no longer necessary. Inline it at the call sites. Reviewed-by: Michał Grzelak # v1 Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/1e5ef7a14cdf42048a03719cff380fee6c3016e0.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7b5379262a37..8f200593053e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -179,16 +179,11 @@ static int vlv_get_cck_clock(struct drm_device *drm, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_device *drm, - const char *name, u32 reg) -{ - return vlv_get_cck_clock(drm, name, reg, vlv_clock_get_hpll_vco(drm)); -} - int vlv_clock_get_hrawclk(struct drm_device *drm) { /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL); + return vlv_get_cck_clock(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); } int vlv_clock_get_czclk(struct drm_device *drm) @@ -196,8 +191,8 @@ int vlv_clock_get_czclk(struct drm_device *drm) struct drm_i915_private *i915 = to_i915(drm); if (!i915->czclk_freq) - i915->czclk_freq = vlv_get_cck_clock_hpll(drm, "czclk", - CCK_CZ_CLOCK_CONTROL); + i915->czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); return i915->czclk_freq; } -- cgit v1.2.3 From e3aae3e40190dcd4a49d927ed10c798c05a5627f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:51 +0300 Subject: drm/i915: remove intel_update_czclk() as unnecessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With vlv_clock_get_czclk() caching the result on first use, we no longer need a separate initializer. Remove intel_update_czclk() as unnecessary. Log the CZCLK in vlv_clock_get_czclk() instead. v2: Rebase Reviewed-by: Michał Grzelak # v1 Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/3f90b5e67258f485db09b6f48381682cbd96153f.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 16 +++------------- drivers/gpu/drm/i915/display/intel_display.h | 1 - drivers/gpu/drm/i915/display/intel_display_driver.c | 1 - 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8f200593053e..fb1882494543 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -190,9 +190,11 @@ int vlv_clock_get_czclk(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - if (!i915->czclk_freq) + if (!i915->czclk_freq) { i915->czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, vlv_clock_get_hpll_vco(drm)); + drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", i915->czclk_freq); + } return i915->czclk_freq; } @@ -209,18 +211,6 @@ int vlv_clock_get_gpll(struct drm_device *drm) vlv_clock_get_czclk(drm)); } -void intel_update_czclk(struct intel_display *display) -{ - int czclk_freq; - - if (!display->platform.valleyview && !display->platform.cherryview) - return; - - czclk_freq = vlv_clock_get_czclk(display->drm); - - drm_dbg_kms(display->drm, "CZ clock rate: %d kHz\n", czclk_freq); -} - static bool is_hdr_mode(const struct intel_crtc_state *crtc_state) { return (crtc_state->active_planes & diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 5c406b276a76..54961cb656c3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -528,7 +528,6 @@ void intel_init_display_hooks(struct intel_display *display); void intel_setup_outputs(struct intel_display *display); int intel_initial_commit(struct intel_display *display); void intel_panel_sanitize_ssc(struct intel_display *display); -void intel_update_czclk(struct intel_display *display); enum drm_mode_status intel_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode); int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index cf1c14412abe..f84a0b26b7a6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -482,7 +482,6 @@ int intel_display_driver_probe_nogem(struct intel_display *display) intel_dpll_init(display); intel_fdi_pll_freq_update(display); - intel_update_czclk(display); intel_display_driver_init_hw(display); intel_dpll_update_ref_clks(display); -- cgit v1.2.3 From b478f2035c59fa81c4bdc0ddefbf692b2797676a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:52 +0300 Subject: drm/i915: log HPLL frequency similar to CZCLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With vlv_clock_get_czclk() logging the CZ clock rate when first cached, do the same for HPLL VCO. v2: Rebase Reviewed-by: Michał Grzelak Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/bfc3082f90cf9f74aa40308e10f20da824b1db55.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index fb1882494543..b49661b4e959 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -155,6 +155,8 @@ int vlv_clock_get_hpll_vco(struct drm_device *drm) vlv_cck_put(drm); i915->hpll_freq = vco_freq[hpll_freq] * 1000; + + drm_dbg_kms(drm, "HPLL frequency: %d kHz\n", i915->hpll_freq); } return i915->hpll_freq; -- cgit v1.2.3 From 869d0e96398dca38862b3263244415a05d8a4cf1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:53 +0300 Subject: drm/i915: move hpll and czclk caching under display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perhaps not the ideal place, but better than having to have the fields in both struct drm_i915_private and struct xe_device. v2: Rebase Reviewed-by: Michał Grzelak # v1 Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/cbca9b13f2235a624a21bf7617ffe763e25c848c.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 22 +++++++++++----------- drivers/gpu/drm/i915/display/intel_display_core.h | 5 +++++ drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/xe/xe_device_types.h | 6 ------ 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b49661b4e959..02f50d0f370a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -144,22 +144,22 @@ static void bdw_set_pipe_misc(struct intel_dsb *dsb, /* returns HPLL frequency in kHz */ int vlv_clock_get_hpll_vco(struct drm_device *drm) { - struct drm_i915_private *i915 = to_i915(drm); + struct intel_display *display = to_intel_display(drm); int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; - if (!i915->hpll_freq) { + if (!display->vlv_clock.hpll_freq) { vlv_cck_get(drm); /* Obtain SKU information */ hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; vlv_cck_put(drm); - i915->hpll_freq = vco_freq[hpll_freq] * 1000; + display->vlv_clock.hpll_freq = vco_freq[hpll_freq] * 1000; - drm_dbg_kms(drm, "HPLL frequency: %d kHz\n", i915->hpll_freq); + drm_dbg_kms(drm, "HPLL frequency: %d kHz\n", display->vlv_clock.hpll_freq); } - return i915->hpll_freq; + return display->vlv_clock.hpll_freq; } static int vlv_get_cck_clock(struct drm_device *drm, @@ -190,15 +190,15 @@ int vlv_clock_get_hrawclk(struct drm_device *drm) int vlv_clock_get_czclk(struct drm_device *drm) { - struct drm_i915_private *i915 = to_i915(drm); + struct intel_display *display = to_intel_display(drm); - if (!i915->czclk_freq) { - i915->czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, - vlv_clock_get_hpll_vco(drm)); - drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", i915->czclk_freq); + if (!display->vlv_clock.czclk_freq) { + display->vlv_clock.czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); + drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", display->vlv_clock.czclk_freq); } - return i915->czclk_freq; + return display->vlv_clock.czclk_freq; } int vlv_clock_get_cdclk(struct drm_device *drm) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 8c226406c5cd..791021a4e3bb 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -567,6 +567,11 @@ struct intel_display { u32 bxt_phy_grc; } state; + struct { + unsigned int hpll_freq; + unsigned int czclk_freq; + } vlv_clock; + struct { /* ordered wq for modesets */ struct workqueue_struct *modeset; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6a768aad8edd..37970d8db255 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -239,9 +239,6 @@ struct drm_i915_private { bool preserve_bios_swizzle; - unsigned int hpll_freq; - unsigned int czclk_freq; - /** * wq - Driver workqueue for GEM. * diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 092004d14db2..4353256452aa 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -617,12 +617,6 @@ struct xe_device { struct intel_uncore { spinlock_t lock; } uncore; - - /* only to allow build, not used functionally */ - struct { - unsigned int hpll_freq; - unsigned int czclk_freq; - }; #endif }; -- cgit v1.2.3 From 5615e78e813ee50609b94cbd6a376e29af01814f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 12 Sep 2025 17:48:54 +0300 Subject: drm/i915: split out vlv_clock.[ch] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the VLV clock related functions to their own file. v2: Rebase Reviewed-by: Michał Grzelak # v1 Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/0bc4a930f3e364c4fc37479f56bf07ccee854fcc.1757688216.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_cdclk.c | 1 + drivers/gpu/drm/i915/display/intel_display.c | 74 ------------------------- drivers/gpu/drm/i915/display/intel_display.h | 5 -- drivers/gpu/drm/i915/display/vlv_clock.c | 81 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/vlv_clock.h | 38 +++++++++++++ drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- 8 files changed, 123 insertions(+), 81 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/vlv_clock.c create mode 100644 drivers/gpu/drm/i915/display/vlv_clock.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e58c0c158b3a..78a45a6681df 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -300,6 +300,7 @@ i915-y += \ display/skl_scaler.o \ display/skl_universal_plane.o \ display/skl_watermark.o \ + display/vlv_clock.o \ display/vlv_sideband.o i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index e77efa0f33ed..b54b1006aeb0 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -49,6 +49,7 @@ #include "intel_vdsc.h" #include "skl_watermark.h" #include "skl_watermark_regs.h" +#include "vlv_clock.h" #include "vlv_dsi.h" #include "vlv_sideband.h" diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 02f50d0f370a..a743d1339550 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -129,11 +129,9 @@ #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" -#include "vlv_dpio_phy_regs.h" #include "vlv_dsi.h" #include "vlv_dsi_pll.h" #include "vlv_dsi_regs.h" -#include "vlv_sideband.h" static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_state); static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state); @@ -141,78 +139,6 @@ static void hsw_set_transconf(const struct intel_crtc_state *crtc_state); static void bdw_set_pipe_misc(struct intel_dsb *dsb, const struct intel_crtc_state *crtc_state); -/* returns HPLL frequency in kHz */ -int vlv_clock_get_hpll_vco(struct drm_device *drm) -{ - struct intel_display *display = to_intel_display(drm); - int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; - - if (!display->vlv_clock.hpll_freq) { - vlv_cck_get(drm); - /* Obtain SKU information */ - hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & - CCK_FUSE_HPLL_FREQ_MASK; - vlv_cck_put(drm); - - display->vlv_clock.hpll_freq = vco_freq[hpll_freq] * 1000; - - drm_dbg_kms(drm, "HPLL frequency: %d kHz\n", display->vlv_clock.hpll_freq); - } - - return display->vlv_clock.hpll_freq; -} - -static int vlv_get_cck_clock(struct drm_device *drm, - const char *name, u32 reg, int ref_freq) -{ - u32 val; - int divider; - - vlv_cck_get(drm); - val = vlv_cck_read(drm, reg); - vlv_cck_put(drm); - - divider = val & CCK_FREQUENCY_VALUES; - - drm_WARN(drm, (val & CCK_FREQUENCY_STATUS) != - (divider << CCK_FREQUENCY_STATUS_SHIFT), - "%s change in progress\n", name); - - return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); -} - -int vlv_clock_get_hrawclk(struct drm_device *drm) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL, - vlv_clock_get_hpll_vco(drm)); -} - -int vlv_clock_get_czclk(struct drm_device *drm) -{ - struct intel_display *display = to_intel_display(drm); - - if (!display->vlv_clock.czclk_freq) { - display->vlv_clock.czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, - vlv_clock_get_hpll_vco(drm)); - drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", display->vlv_clock.czclk_freq); - } - - return display->vlv_clock.czclk_freq; -} - -int vlv_clock_get_cdclk(struct drm_device *drm) -{ - return vlv_get_cck_clock(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, - vlv_clock_get_hpll_vco(drm)); -} - -int vlv_clock_get_gpll(struct drm_device *drm) -{ - return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, - vlv_clock_get_czclk(drm)); -} - static bool is_hdr_mode(const struct intel_crtc_state *crtc_state) { return (crtc_state->active_planes & diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 54961cb656c3..9a9a44b61f7f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -435,11 +435,6 @@ void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state); void intel_disable_transcoder(const struct intel_crtc_state *old_crtc_state); void i830_enable_pipe(struct intel_display *display, enum pipe pipe); void i830_disable_pipe(struct intel_display *display, enum pipe pipe); -int vlv_clock_get_hpll_vco(struct drm_device *drm); -int vlv_clock_get_hrawclk(struct drm_device *drm); -int vlv_clock_get_czclk(struct drm_device *drm); -int vlv_clock_get_cdclk(struct drm_device *drm); -int vlv_clock_get_gpll(struct drm_device *drm); bool intel_has_pending_fb_unpin(struct intel_display *display); void intel_encoder_destroy(struct drm_encoder *encoder); struct drm_display_mode * diff --git a/drivers/gpu/drm/i915/display/vlv_clock.c b/drivers/gpu/drm/i915/display/vlv_clock.c new file mode 100644 index 000000000000..2c55083d8fdb --- /dev/null +++ b/drivers/gpu/drm/i915/display/vlv_clock.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include + +#include "intel_display_core.h" +#include "intel_display_types.h" +#include "vlv_clock.h" +#include "vlv_sideband.h" + +/* returns HPLL frequency in kHz */ +int vlv_clock_get_hpll_vco(struct drm_device *drm) +{ + struct intel_display *display = to_intel_display(drm); + int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; + + if (!display->vlv_clock.hpll_freq) { + vlv_cck_get(drm); + /* Obtain SKU information */ + hpll_freq = vlv_cck_read(drm, CCK_FUSE_REG) & + CCK_FUSE_HPLL_FREQ_MASK; + vlv_cck_put(drm); + + display->vlv_clock.hpll_freq = vco_freq[hpll_freq] * 1000; + + drm_dbg_kms(drm, "HPLL frequency: %d kHz\n", display->vlv_clock.hpll_freq); + } + + return display->vlv_clock.hpll_freq; +} + +static int vlv_get_cck_clock(struct drm_device *drm, + const char *name, u32 reg, int ref_freq) +{ + u32 val; + int divider; + + vlv_cck_get(drm); + val = vlv_cck_read(drm, reg); + vlv_cck_put(drm); + + divider = val & CCK_FREQUENCY_VALUES; + + drm_WARN(drm, (val & CCK_FREQUENCY_STATUS) != + (divider << CCK_FREQUENCY_STATUS_SHIFT), + "%s change in progress\n", name); + + return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); +} + +int vlv_clock_get_hrawclk(struct drm_device *drm) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); +} + +int vlv_clock_get_czclk(struct drm_device *drm) +{ + struct intel_display *display = to_intel_display(drm); + + if (!display->vlv_clock.czclk_freq) { + display->vlv_clock.czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); + drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", display->vlv_clock.czclk_freq); + } + + return display->vlv_clock.czclk_freq; +} + +int vlv_clock_get_cdclk(struct drm_device *drm) +{ + return vlv_get_cck_clock(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, + vlv_clock_get_hpll_vco(drm)); +} + +int vlv_clock_get_gpll(struct drm_device *drm) +{ + return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, + vlv_clock_get_czclk(drm)); +} diff --git a/drivers/gpu/drm/i915/display/vlv_clock.h b/drivers/gpu/drm/i915/display/vlv_clock.h new file mode 100644 index 000000000000..5742ed3c628d --- /dev/null +++ b/drivers/gpu/drm/i915/display/vlv_clock.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __VLV_CLOCK_H__ +#define __VLV_CLOCK_H__ + +struct drm_device; + +#ifdef I915 +int vlv_clock_get_hpll_vco(struct drm_device *drm); +int vlv_clock_get_hrawclk(struct drm_device *drm); +int vlv_clock_get_czclk(struct drm_device *drm); +int vlv_clock_get_cdclk(struct drm_device *drm); +int vlv_clock_get_gpll(struct drm_device *drm); +#else +static inline int vlv_clock_get_hpll_vco(struct drm_device *drm) +{ + return 0; +} +static inline int vlv_clock_get_hrawclk(struct drm_device *drm) +{ + return 0; +} +static inline int vlv_clock_get_czclk(struct drm_device *drm) +{ + return 0; +} +static inline int vlv_clock_get_cdclk(struct drm_device *drm) +{ + return 0; +} +static inline int vlv_clock_get_gpll(struct drm_device *drm) +{ + return 0; +} +#endif + +#endif /* __VLV_CLOCK_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ef8b2fd2ae69..932f9f1b06b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -6,7 +6,7 @@ #include #include -#include "display/intel_display.h" +#include "display/vlv_clock.h" #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index db9cfd2b2b89..b01c837ab646 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -7,8 +7,8 @@ #include -#include "display/intel_display.h" #include "display/intel_display_rps.h" +#include "display/vlv_clock.h" #include "soc/intel_dram.h" #include "i915_drv.h" -- cgit v1.2.3 From fcf2af765c1e4f47f2fc8aa8ce7d368fc5728f46 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Fri, 12 Sep 2025 09:40:35 +0300 Subject: drm/i915/alpm: Remove error handling from get_lfps_cycle_min_max_time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Getter for LFPS cycle min/max times is unnecessarily checking faulty port clock value. This doesn't make sense as erroneous port clock value would have been noticed already at this point. Remove this check and use 140/800 ns always when port clock > 540000. Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://lore.kernel.org/r/20250912064035.335329-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_alpm.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index ed7a7ed486b5..749119cc0b28 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -58,43 +58,32 @@ static int get_silence_period_symbols(const struct intel_crtc_state *crtc_state) 1000 / 1000; } -static int get_lfps_cycle_min_max_time(const struct intel_crtc_state *crtc_state, - int *min, int *max) +static void get_lfps_cycle_min_max_time(const struct intel_crtc_state *crtc_state, + int *min, int *max) { if (crtc_state->port_clock < 540000) { *min = 65 * LFPS_CYCLE_COUNT; *max = 75 * LFPS_CYCLE_COUNT; - } else if (crtc_state->port_clock <= 810000) { + } else { *min = 140; *max = 800; - } else { - *min = *max = -1; - return -1; } - - return 0; } static int get_lfps_cycle_time(const struct intel_crtc_state *crtc_state) { - int tlfps_cycle_min, tlfps_cycle_max, ret; + int tlfps_cycle_min, tlfps_cycle_max; - ret = get_lfps_cycle_min_max_time(crtc_state, &tlfps_cycle_min, - &tlfps_cycle_max); - if (ret) - return ret; + get_lfps_cycle_min_max_time(crtc_state, &tlfps_cycle_min, + &tlfps_cycle_max); return tlfps_cycle_min + (tlfps_cycle_max - tlfps_cycle_min) / 2; } static int get_lfps_half_cycle_clocks(const struct intel_crtc_state *crtc_state) { - int lfps_cycle_time = get_lfps_cycle_time(crtc_state); - - if (lfps_cycle_time < 0) - return -1; - - return lfps_cycle_time * crtc_state->port_clock / 1000 / 1000 / (2 * LFPS_CYCLE_COUNT); + return get_lfps_cycle_time(crtc_state) * crtc_state->port_clock / 1000 / + 1000 / (2 * LFPS_CYCLE_COUNT); } /* @@ -146,8 +135,6 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, silence_period = get_silence_period_symbols(crtc_state); lfps_half_cycle = get_lfps_half_cycle_clocks(crtc_state); - if (lfps_half_cycle < 0) - return false; if (aux_less_wake_lines > ALPM_CTL_AUX_LESS_WAKE_TIME_MASK || silence_period > PORT_ALPM_CTL_SILENCE_PERIOD_MASK || -- cgit v1.2.3 From 7a356ee5cf6dad1c7d305eea261a03a925454ed6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 17 Sep 2025 16:52:00 +0300 Subject: drm/i915: add note on VLV/CHV hpll_freq and czclk_freq caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caching at the initial read is a bit fragile in case, say, a further refactoring starts reading the frequencies at a time where it's not possible. Add a note about it. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250917135200.1932903-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_clock.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/vlv_clock.c b/drivers/gpu/drm/i915/display/vlv_clock.c index 2c55083d8fdb..42c2837b32c1 100644 --- a/drivers/gpu/drm/i915/display/vlv_clock.c +++ b/drivers/gpu/drm/i915/display/vlv_clock.c @@ -8,6 +8,13 @@ #include "vlv_clock.h" #include "vlv_sideband.h" +/* + * FIXME: The caching of hpll_freq and czclk_freq relies on the first calls + * occurring at a time when they can actually be read. This appears to be the + * case, but is somewhat fragile. Make the initialization explicit at a point + * where they can be reliably read. + */ + /* returns HPLL frequency in kHz */ int vlv_clock_get_hpll_vco(struct drm_device *drm) { -- cgit v1.2.3 From e296a2266c572a7537e638b0dbbfc66d11df46f9 Mon Sep 17 00:00:00 2001 From: Taotao Chen Date: Fri, 22 Aug 2025 03:06:59 +0000 Subject: drm/i915: set O_LARGEFILE in __create_shmem() Without O_LARGEFILE, file->f_op->write_iter calls generic_write_check_limits(), which enforces a 2GB (MAX_NON_LFS) limit, causing -EFBIG on large writes. In shmem_pwrite(), this error is later masked as -EIO due to the error handling order, leading to igt failures like gen9_exec_parse(bb-large). Set O_LARGEFILE in __create_shmem() to prevent -EFBIG on large writes. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508081029.343192ec-lkp@intel.com Fixes: 048832a3f400 ("drm/i915: Refactor shmem_pwrite() to use kiocb and write_iter") Signed-off-by: Taotao Chen Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250822030651.28099-1-chentaotao@didiglobal.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e3d188455f67..b9dae15c1d16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -514,6 +514,13 @@ static int __create_shmem(struct drm_i915_private *i915, if (IS_ERR(filp)) return PTR_ERR(filp); + /* + * Prevent -EFBIG by allowing large writes beyond MAX_NON_LFS on shmem + * objects by setting O_LARGEFILE. + */ + if (force_o_largefile()) + filp->f_flags |= O_LARGEFILE; + obj->filp = filp; return 0; } -- cgit v1.2.3 From 6fa6c7a50e465c32a075d3e0341bcd4f0fe0bb47 Mon Sep 17 00:00:00 2001 From: Taotao Chen Date: Fri, 22 Aug 2025 03:07:04 +0000 Subject: drm/i915: Fix incorrect error handling in shmem_pwrite() shmem_pwrite() currently checks for short writes before negative error codes, which can overwrite real errors (e.g., -EFBIG) with -EIO. Reorder the checks to return negative errors first, then handle short writes. Signed-off-by: Taotao Chen Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250822030651.28099-2-chentaotao@didiglobal.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index b9dae15c1d16..26dda55a07ff 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -441,11 +441,20 @@ shmem_pwrite(struct drm_i915_gem_object *obj, written = file->f_op->write_iter(&kiocb, &iter); BUG_ON(written == -EIOCBQUEUED); - if (written != size) - return -EIO; - + /* + * First, check if write_iter returned a negative error. + * If the write failed, return the real error code immediately. + * This prevents it from being overwritten by the short write check below. + */ if (written < 0) return written; + /* + * Check for a short write (written bytes != requested size). + * Even if some data was written, return -EIO to indicate that the + * write was not fully completed. + */ + if (written != size) + return -EIO; return 0; } -- cgit v1.2.3 From f80fb921747b46d3e65887099977e457fba7256a Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 16 Sep 2025 17:43:19 +0000 Subject: drm/i915/gvt: Remove unnecessary check in reg_is_mmio The reg >= 0 check in reg_is_mmio is unnecessary because reg is always greater than zero in all current use cases. This is obvious when checking 'offset' by itself (as offset is defined as an unsigned integer), but it's also true for the offset + bytes - 1 use case in intel_vgpu_emulate_mmio_read because bytes > 0. Signed-off-by: Jonathan Cavitt Reviewed-by: Andi Shyti Reviewed-by: Zhenyu Wang Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250916174317.76521-5-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gvt/mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index da1135fa7cda..c60d1e548800 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -58,7 +58,7 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) } #define reg_is_mmio(gvt, reg) \ - (reg >= 0 && reg < gvt->device_info.mmio_size) + (reg < gvt->device_info.mmio_size) #define reg_is_gtt(gvt, reg) \ (reg >= gvt->device_info.gtt_start_offset \ -- cgit v1.2.3 From 96e556ef5ced51eec140db4bd89204c9322959cc Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 16 Sep 2025 17:43:20 +0000 Subject: drm/i915/gvt: Fix intel_vgpu_gpa_to_mmio_offset kernel docs intel_vgpu_gpa_to_mmio_offset states that it returns 'Zero on success, negative error code if failed' in the kernel docs. This is false. The function actually returns 'The MMIO offset of the given GPA'. Correct the docs. Signed-off-by: Jonathan Cavitt Reviewed-by: Andi Shyti Reviewed-by: Zhenyu Wang Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250916174317.76521-6-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gvt/mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index c60d1e548800..db5cd65100fe 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -49,7 +49,7 @@ * @gpa: guest physical address * * Returns: - * Zero on success, negative error code if failed + * The MMIO offset of the given GPA */ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) { -- cgit v1.2.3 From 089b5773c219b9bd5e3c7ac72d6ad933dd050f4d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 12 Sep 2025 16:59:26 +0300 Subject: drm/i915: Defeature DRRS on LNL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRRS has been defeatured on LNL+. Adjust HAS_DOUBLE_BUFFERED_M_N() to match. Note that the M/N registers still appear to be double buffered under the hood but the double buffer update point is now documented to be just the last register write to the M/N registers, so it no longer happens synchronously with the vblank/MSA transmission. We should perhaps rename HAS_DOUBLE_BUFFERED_M_N() to more accurately reflect reality, but couldn't come up with a decent name right now... Bspec: 68917 HSD: 14016007525 Cc: Ankit Nautiyal Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250912135926.18910-1-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index f329f1beafef..1f091fbcd0ec 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -155,7 +155,7 @@ struct intel_display_platforms { #define HAS_DISPLAY(__display) (DISPLAY_RUNTIME_INFO(__display)->pipe_mask != 0) #define HAS_DMC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dmc) #define HAS_DMC_WAKELOCK(__display) (DISPLAY_VER(__display) >= 20) -#define HAS_DOUBLE_BUFFERED_M_N(__display) (DISPLAY_VER(__display) >= 9 || (__display)->platform.broadwell) +#define HAS_DOUBLE_BUFFERED_M_N(__display) (IS_DISPLAY_VER((__display), 9, 14) || (__display)->platform.broadwell) #define HAS_DOUBLE_BUFFERED_LUT(__display) (DISPLAY_VER(__display) >= 30) #define HAS_DOUBLE_WIDE(__display) (DISPLAY_VER(__display) < 4) #define HAS_DP20(__display) ((__display)->platform.dg2 || DISPLAY_VER(__display) >= 14) -- cgit v1.2.3 From a0f8dd08a55b6a5c456c0124dc9a978aa65b651c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Sep 2025 23:34:42 +0300 Subject: drm/i915/vrr: Extract helpers to convert between guardband and pipeline_full values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'd like to move towards a world where we can't more or less pretend that the ICl/TGL VRR hardware works the same way as ADL+. To that end extract some helpers to convert between the guardband and pipeline_full representations. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250917203446.14374-2-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 3eed37f271b0..5fee85b0bc99 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -119,6 +119,20 @@ static int intel_vrr_vmin_flipline(const struct intel_crtc_state *crtc_state) return crtc_state->vrr.vmin + intel_vrr_flipline_offset(display); } +static int intel_vrr_guardband_to_pipeline_full(const struct intel_crtc_state *crtc_state, + int guardband) +{ + /* hardware imposes one extra scanline somewhere */ + return guardband - crtc_state->framestart_delay - 1; +} + +static int intel_vrr_pipeline_full_to_guardband(const struct intel_crtc_state *crtc_state, + int pipeline_full) +{ + /* hardware imposes one extra scanline somewhere */ + return pipeline_full + crtc_state->framestart_delay + 1; +} + /* * Without VRR registers get latched at: * vblank_start @@ -142,8 +156,8 @@ static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_stat if (DISPLAY_VER(display) >= 13) return crtc_state->vrr.guardband; else - /* hardware imposes one extra scanline somewhere */ - return crtc_state->vrr.pipeline_full + crtc_state->framestart_delay + 1; + return intel_vrr_pipeline_full_to_guardband(crtc_state, + crtc_state->vrr.pipeline_full); } int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state) @@ -417,18 +431,18 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + int guardband; if (!intel_vrr_possible(crtc_state)) return; + guardband = crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start; + if (DISPLAY_VER(display) >= 13) { - crtc_state->vrr.guardband = - crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start; + crtc_state->vrr.guardband = guardband; } else { - /* hardware imposes one extra scanline somewhere */ crtc_state->vrr.pipeline_full = - min(255, crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start - - crtc_state->framestart_delay - 1); + min(255, intel_vrr_guardband_to_pipeline_full(crtc_state, guardband)); /* * vmin/vmax/flipline also need to be adjusted by -- cgit v1.2.3 From 6559ca4a42d7976497fcf49e4c4fad9639fc1f76 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Sep 2025 23:34:43 +0300 Subject: drm/i915/vrr: Readout framestart_delay earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to pretend that ICL/TGL VRR hardware has a similar guardband as on ADL+ we'll need access to framestart_delay already during intel_vrr_get_config(). Hoist the framestart_delay to an earlier point to make that possible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250917203446.14374-3-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a743d1339550..c7d85fd38890 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3891,6 +3891,15 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, intel_joiner_get_config(pipe_config); intel_dsc_get_config(pipe_config); + if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) { + tmp = intel_de_read(display, CHICKEN_TRANS(display, pipe_config->cpu_transcoder)); + + pipe_config->framestart_delay = REG_FIELD_GET(HSW_FRAME_START_DELAY_MASK, tmp) + 1; + } else { + /* no idea if this is correct */ + pipe_config->framestart_delay = 1; + } + if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || DISPLAY_VER(display) >= 11) intel_get_transcoder_timings(crtc, pipe_config); @@ -3942,15 +3951,6 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, pipe_config->pixel_multiplier = 1; } - if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) { - tmp = intel_de_read(display, CHICKEN_TRANS(display, pipe_config->cpu_transcoder)); - - pipe_config->framestart_delay = REG_FIELD_GET(HSW_FRAME_START_DELAY_MASK, tmp) + 1; - } else { - /* no idea if this is correct */ - pipe_config->framestart_delay = 1; - } - out: intel_display_power_put_all_in_set(display, &crtc->hw_readout_power_domains); -- cgit v1.2.3 From 291ddb993ac967a519cdb5394fd06871b9fa74bf Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Sep 2025 23:34:44 +0300 Subject: drm/i915/vrr: Store guardband in crtc state even for icl/tgl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While ICL/TGL VRR hardware doesn't have a register for the guardband value, our lives will be simpler if we pretend that it does. Start by computing the guardband the same as on ADL+ and storing it in the state, and only then we convert it into the corresponding pipeline_full value that the hardware can consume. During readout we do the opposite. I was debating whether to completely remove pipeline_full from the crtc state, but decided to keep it for now. Mainly because we check it in vrr_params_changed() and simply checking the guardband instead isn't 100% equivalent; Theoretically, framestart_delay may have changed in the opposite direction to pipeline_full, keeping the derived guardband value unchaged. One solution would be to also check framestart_delay, but that feels a bit leaky abstraction wise. Also note that we don't currently handle the maximum limit of 255 scanlines for the pipeline_full in a very nice way. The actual position of the delayed vblank will move because of that clamping, and so some of our code may get confused. But fixing this shall wait a for now. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250917203446.14374-4-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 1 + drivers/gpu/drm/i915/display/intel_vrr.c | 36 +++++++++++++++------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c7d85fd38890..f4124c79bc83 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3891,6 +3891,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, intel_joiner_get_config(pipe_config); intel_dsc_get_config(pipe_config); + /* intel_vrr_get_config() depends on .framestart_delay */ if (!transcoder_is_dsi(pipe_config->cpu_transcoder)) { tmp = intel_de_read(display, CHICKEN_TRANS(display, pipe_config->cpu_transcoder)); diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5fee85b0bc99..9cdcc2558ead 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -151,13 +151,7 @@ static int intel_vrr_pipeline_full_to_guardband(const struct intel_crtc_state *c */ static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - - if (DISPLAY_VER(display) >= 13) - return crtc_state->vrr.guardband; - else - return intel_vrr_pipeline_full_to_guardband(crtc_state, - crtc_state->vrr.pipeline_full); + return crtc_state->vrr.guardband; } int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state) @@ -431,18 +425,22 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - int guardband; if (!intel_vrr_possible(crtc_state)) return; - guardband = crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start; + crtc_state->vrr.guardband = + crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start; + + if (DISPLAY_VER(display) < 13) { + /* FIXME handle the limit in a proper way */ + crtc_state->vrr.guardband = + min(crtc_state->vrr.guardband, + intel_vrr_pipeline_full_to_guardband(crtc_state, 255)); - if (DISPLAY_VER(display) >= 13) { - crtc_state->vrr.guardband = guardband; - } else { crtc_state->vrr.pipeline_full = - min(255, intel_vrr_guardband_to_pipeline_full(crtc_state, guardband)); + intel_vrr_guardband_to_pipeline_full(crtc_state, + crtc_state->vrr.guardband); /* * vmin/vmax/flipline also need to be adjusted by @@ -734,14 +732,20 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) TRANS_CMRR_M_HI(display, cpu_transcoder)); } - if (DISPLAY_VER(display) >= 13) + if (DISPLAY_VER(display) >= 13) { crtc_state->vrr.guardband = REG_FIELD_GET(XELPD_VRR_CTL_VRR_GUARDBAND_MASK, trans_vrr_ctl); - else - if (trans_vrr_ctl & VRR_CTL_PIPELINE_FULL_OVERRIDE) + } else { + if (trans_vrr_ctl & VRR_CTL_PIPELINE_FULL_OVERRIDE) { crtc_state->vrr.pipeline_full = REG_FIELD_GET(VRR_CTL_PIPELINE_FULL_MASK, trans_vrr_ctl); + crtc_state->vrr.guardband = + intel_vrr_pipeline_full_to_guardband(crtc_state, + crtc_state->vrr.pipeline_full); + } + } + if (trans_vrr_ctl & VRR_CTL_FLIP_LINE_EN) { crtc_state->vrr.flipline = intel_de_read(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder)) + 1; -- cgit v1.2.3 From 1e2266dc68ae3d8c5017f07d2002a2ba79549f01 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Sep 2025 23:34:45 +0300 Subject: drm/i915/vrr: Annotate some functions with "hw" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_vrr_fixed_rr_*() return values that have had the TGL SCL adjustment applied to them. So we should indicate that these values are only really useful when fed to the hardware. Add a "_hw_" indicator to the function names to reflect that fact. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250917203446.14374-5-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 9cdcc2558ead..71fb64c92165 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -263,7 +263,7 @@ void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state) * Vtotal value. */ static -int intel_vrr_fixed_rr_vtotal(const struct intel_crtc_state *crtc_state) +int intel_vrr_fixed_rr_hw_vtotal(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); int crtc_vtotal = crtc_state->hw.adjusted_mode.crtc_vtotal; @@ -276,24 +276,24 @@ int intel_vrr_fixed_rr_vtotal(const struct intel_crtc_state *crtc_state) } static -int intel_vrr_fixed_rr_vmax(const struct intel_crtc_state *crtc_state) +int intel_vrr_fixed_rr_hw_vmax(const struct intel_crtc_state *crtc_state) { - return intel_vrr_fixed_rr_vtotal(crtc_state); + return intel_vrr_fixed_rr_hw_vtotal(crtc_state); } static -int intel_vrr_fixed_rr_vmin(const struct intel_crtc_state *crtc_state) +int intel_vrr_fixed_rr_hw_vmin(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - return intel_vrr_fixed_rr_vtotal(crtc_state) - + return intel_vrr_fixed_rr_hw_vtotal(crtc_state) - intel_vrr_flipline_offset(display); } static -int intel_vrr_fixed_rr_flipline(const struct intel_crtc_state *crtc_state) +int intel_vrr_fixed_rr_hw_flipline(const struct intel_crtc_state *crtc_state) { - return intel_vrr_fixed_rr_vtotal(crtc_state); + return intel_vrr_fixed_rr_hw_vtotal(crtc_state); } void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state *crtc_state) @@ -305,11 +305,11 @@ void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state *crtc_state) return; intel_de_write(display, TRANS_VRR_VMIN(display, cpu_transcoder), - intel_vrr_fixed_rr_vmin(crtc_state) - 1); + intel_vrr_fixed_rr_hw_vmin(crtc_state) - 1); intel_de_write(display, TRANS_VRR_VMAX(display, cpu_transcoder), - intel_vrr_fixed_rr_vmax(crtc_state) - 1); + intel_vrr_fixed_rr_hw_vmax(crtc_state) - 1); intel_de_write(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder), - intel_vrr_fixed_rr_flipline(crtc_state) - 1); + intel_vrr_fixed_rr_hw_flipline(crtc_state) - 1); } static -- cgit v1.2.3 From a58b9e3d6e6777fc52040fcd93a46d107f87c7ec Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 17 Sep 2025 23:34:46 +0300 Subject: drm/i915/vrr: Move the TGL SCL mangling of vmin/vmax/flipline deeper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently our crtc_state->vrr.{vmin.vmax,flipline} are mangled on TGL to account for the SCL delay (the hardware requires this mangling or the actual vtotals will become incorrect). Unfortunately this means that one can't simply use these values directly in many places, and instead we always have to go through functions that undo the damage first. This is all rather fragile. Simplify our lives a bit by hiding this mangling deeper inside the low level VRR code, leaving the number stored in the crtc state actually something that humans can use. This does introduce a dependdency as intel_vrr_get_config() will now need to know the SCL value, which is read out in intel_get_transcoder_timings(). I suppose we could simply duplicate the SCL readout in both places should this become a real hinderance. For now just leave a note around the intel_get_transcoder_timings() call to remind us. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250917203446.14374-6-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++ drivers/gpu/drm/i915/display/intel_vrr.c | 76 ++++++++++++++++------------ 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f4124c79bc83..18b9baa96241 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3901,6 +3901,10 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, pipe_config->framestart_delay = 1; } + /* + * intel_vrr_get_config() depends on TRANS_SET_CONTEXT_LATENCY + * readout done by intel_get_transcoder_timings(). + */ if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || DISPLAY_VER(display) >= 11) intel_get_transcoder_timings(crtc, pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 71fb64c92165..71a985d00604 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -156,25 +156,13 @@ static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_stat int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - /* Min vblank actually determined by flipline */ - if (DISPLAY_VER(display) >= 13) - return intel_vrr_vmin_flipline(crtc_state); - else - return intel_vrr_vmin_flipline(crtc_state) + - intel_vrr_real_vblank_delay(crtc_state); + return intel_vrr_vmin_flipline(crtc_state); } int intel_vrr_vmax_vtotal(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - - if (DISPLAY_VER(display) >= 13) - return crtc_state->vrr.vmax; - else - return crtc_state->vrr.vmax + - intel_vrr_real_vblank_delay(crtc_state); + return crtc_state->vrr.vmax; } int intel_vrr_vmin_vblank_start(const struct intel_crtc_state *crtc_state) @@ -258,6 +246,21 @@ void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state) crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } +static int intel_vrr_hw_value(const struct intel_crtc_state *crtc_state, + int value) +{ + struct intel_display *display = to_intel_display(crtc_state); + + /* + * On TGL vmin/vmax/flipline also need to be + * adjusted by the SCL to maintain correct vtotals. + */ + if (DISPLAY_VER(display) >= 13) + return value; + else + return value - intel_vrr_real_vblank_delay(crtc_state); +} + /* * For fixed refresh rate mode Vmin, Vmax and Flipline all are set to * Vtotal value. @@ -265,14 +268,7 @@ void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state) static int intel_vrr_fixed_rr_hw_vtotal(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - int crtc_vtotal = crtc_state->hw.adjusted_mode.crtc_vtotal; - - if (DISPLAY_VER(display) >= 13) - return crtc_vtotal; - else - return crtc_vtotal - - intel_vrr_real_vblank_delay(crtc_state); + return intel_vrr_hw_value(crtc_state, crtc_state->hw.adjusted_mode.crtc_vtotal); } static @@ -441,14 +437,6 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) crtc_state->vrr.pipeline_full = intel_vrr_guardband_to_pipeline_full(crtc_state, crtc_state->vrr.guardband); - - /* - * vmin/vmax/flipline also need to be adjusted by - * the vblank delay to maintain correct vtotals. - */ - crtc_state->vrr.vmin -= intel_vrr_real_vblank_delay(crtc_state); - crtc_state->vrr.vmax -= intel_vrr_real_vblank_delay(crtc_state); - crtc_state->vrr.flipline -= intel_vrr_real_vblank_delay(crtc_state); } } @@ -607,6 +595,21 @@ void intel_vrr_set_db_point_and_transmission_line(const struct intel_crtc_state EMP_AS_SDP_DB_TL(crtc_state->vrr.vsync_start)); } +static int intel_vrr_hw_vmin(const struct intel_crtc_state *crtc_state) +{ + return intel_vrr_hw_value(crtc_state, crtc_state->vrr.vmin); +} + +static int intel_vrr_hw_vmax(const struct intel_crtc_state *crtc_state) +{ + return intel_vrr_hw_value(crtc_state, crtc_state->vrr.vmax); +} + +static int intel_vrr_hw_flipline(const struct intel_crtc_state *crtc_state) +{ + return intel_vrr_hw_value(crtc_state, crtc_state->vrr.flipline); +} + void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -616,11 +619,11 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) return; intel_de_write(display, TRANS_VRR_VMIN(display, cpu_transcoder), - crtc_state->vrr.vmin - 1); + intel_vrr_hw_vmin(crtc_state) - 1); intel_de_write(display, TRANS_VRR_VMAX(display, cpu_transcoder), - crtc_state->vrr.vmax - 1); + intel_vrr_hw_vmax(crtc_state) - 1); intel_de_write(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder), - crtc_state->vrr.flipline - 1); + intel_vrr_hw_flipline(crtc_state) - 1); intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); @@ -754,6 +757,13 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) crtc_state->vrr.vmin = intel_de_read(display, TRANS_VRR_VMIN(display, cpu_transcoder)) + 1; + if (DISPLAY_VER(display) < 13) { + /* undo what intel_vrr_hw_value() does when writing the values */ + crtc_state->vrr.flipline += intel_vrr_real_vblank_delay(crtc_state); + crtc_state->vrr.vmax += intel_vrr_real_vblank_delay(crtc_state); + crtc_state->vrr.vmin += intel_vrr_real_vblank_delay(crtc_state); + } + /* * For platforms that always use VRR Timing Generator, the VTOTAL.Vtotal * bits are not filled. Since for these platforms TRAN_VMIN is always -- cgit v1.2.3 From 4a36b339a14ae6f2a366125e3d64f0c165193293 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:51 +0300 Subject: drm/xe/fbdev: use the same 64-byte stride alignment as i915 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For reasons unknown, xe uses XE_PAGE_SIZE alignment for stride. Presumably it's just a confusion between stride alignment and bo allocation size alignment. Switch to 64 byte alignment to, uh, align with i915. This will also be helpful in deduplicating and unifying the xe and i915 framebuffer allocation. Link: https://lore.kernel.org/r/aLqsC87Ol_zCXOkN@intel.com Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/7f4972104de8b179d5724ae83892ee294d3f3fd3.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index d96ba2b51065..7dd581da7b79 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -33,7 +33,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, mode_cmd.height = sizes->surface_height; mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE); + DIV_ROUND_UP(sizes->surface_bpp, 8), 64); mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); -- cgit v1.2.3 From 6979d2c80c2a5b1f04157c4d6eb038bb32861cfa Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:52 +0300 Subject: drm/i915/fbdev: make intel_framebuffer_create() error return handling explicit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's sketchy to pass error pointers via to_intel_framebuffer(). It probably works as long as struct intel_framebuffer embeds struct drm_framebuffer at offset 0, but be explicit about it. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/17631db227d527d6c67f5d6b67adec1ff8dc6f8d.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 210aee9ae88b..b9dfd00a7d05 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -67,9 +67,16 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format, mode_cmd.modifier[0]), &mode_cmd); + if (IS_ERR(fb)) { + i915_gem_object_put(obj); + goto err; + } + i915_gem_object_put(obj); return to_intel_framebuffer(fb); +err: + return ERR_CAST(fb); } int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, -- cgit v1.2.3 From 9e5cf822a207ee8c9856024c047abaccb4d185e5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:53 +0300 Subject: drm/{i915, xe}/fbdev: pass struct drm_device to intel_fbdev_fb_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function doesn't actually need struct drm_fb_helper for anything, just pass struct drm_device. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/16360584f80cdc5ee35fd94cfd92fd3955588dfd.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 10 +++++----- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 4 ++-- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 7 +++---- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 7c4709d58aa3..46c6de5f6088 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -237,7 +237,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, if (!fb || drm_WARN_ON(display->drm, !intel_fb_bo(&fb->base))) { drm_dbg_kms(display->drm, "no BIOS fb, allocating a new one\n"); - fb = intel_fbdev_fb_alloc(helper, sizes); + fb = intel_fbdev_fb_alloc(display->drm, sizes); if (IS_ERR(fb)) return PTR_ERR(fb); } else { diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index b9dfd00a7d05..4de13d1a4c7a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -13,11 +13,11 @@ #include "intel_fb.h" #include "intel_fbdev_fb.h" -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, +struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_fb_helper_surface_size *sizes) { - struct intel_display *display = to_intel_display(helper->dev); - struct drm_i915_private *dev_priv = to_i915(display->drm); + struct intel_display *display = to_intel_display(drm); + struct drm_i915_private *dev_priv = to_i915(drm); struct drm_framebuffer *fb; struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; @@ -58,12 +58,12 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { - drm_err(display->drm, "failed to allocate framebuffer (%pe)\n", obj); + drm_err(drm, "failed to allocate framebuffer (%pe)\n", obj); return ERR_PTR(-ENOMEM); } fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), - drm_get_format_info(display->drm, + drm_get_format_info(drm, mode_cmd.pixel_format, mode_cmd.modifier[0]), &mode_cmd); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index cb7957272715..668ae355f5e5 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -6,14 +6,14 @@ #ifndef __INTEL_FBDEV_FB_H__ #define __INTEL_FBDEV_FB_H__ -struct drm_fb_helper; +struct drm_device; struct drm_fb_helper_surface_size; struct drm_gem_object; struct fb_info; struct i915_vma; struct intel_display; -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, +struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_fb_helper_surface_size *sizes); int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *obj, struct i915_vma *vma); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 7dd581da7b79..b9e855c54304 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -15,12 +15,11 @@ #include -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, +struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_fb_helper_surface_size *sizes) { struct drm_framebuffer *fb; - struct drm_device *dev = helper->dev; - struct xe_device *xe = to_xe_device(dev); + struct xe_device *xe = to_xe_device(drm); struct drm_mode_fb_cmd2 mode_cmd = {}; struct xe_bo *obj; int size; @@ -67,7 +66,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } fb = intel_framebuffer_create(&obj->ttm.base, - drm_get_format_info(dev, + drm_get_format_info(drm, mode_cmd.pixel_format, mode_cmd.modifier[0]), &mode_cmd); -- cgit v1.2.3 From f9ff39f940f5ddd1d4ffcff602de7206aa1ff05d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:54 +0300 Subject: drm/{i915, xe}/fbdev: deduplicate struct drm_mode_fb_cmd2 init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull struct drm_mode_fb_cmd2 initialization out of the driver dependent code into shared display code. v2: Rebase on xe stride alignment change Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/e922e47bfd39f9c5777f869ff23c23309ebbb380.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 32 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 25 +++++---------------- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 4 ++-- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 25 +++++---------------- 4 files changed, 45 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 46c6de5f6088..e46c08762b84 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -207,6 +207,35 @@ static const struct drm_fb_helper_funcs intel_fb_helper_funcs = { .fb_set_suspend = intelfb_set_suspend, }; +static void intel_fbdev_fill_mode_cmd(struct drm_fb_helper_surface_size *sizes, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + /* we don't do packed 24bpp */ + if (sizes->surface_bpp == 24) + sizes->surface_bpp = 32; + + mode_cmd->width = sizes->surface_width; + mode_cmd->height = sizes->surface_height; + + mode_cmd->pitches[0] = ALIGN(mode_cmd->width * DIV_ROUND_UP(sizes->surface_bpp, 8), 64); + mode_cmd->pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); +} + +static struct intel_framebuffer * +__intel_fbdev_fb_alloc(struct intel_display *display, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_mode_fb_cmd2 mode_cmd = {}; + struct intel_framebuffer *fb; + + intel_fbdev_fill_mode_cmd(sizes, &mode_cmd); + + fb = intel_fbdev_fb_alloc(display->drm, &mode_cmd); + + return fb; +} + int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { @@ -237,7 +266,8 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, if (!fb || drm_WARN_ON(display->drm, !intel_fb_bo(&fb->base))) { drm_dbg_kms(display->drm, "no BIOS fb, allocating a new one\n"); - fb = intel_fbdev_fb_alloc(display->drm, sizes); + + fb = __intel_fbdev_fb_alloc(display, sizes); if (IS_ERR(fb)) return PTR_ERR(fb); } else { diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 4de13d1a4c7a..685612e6afc5 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -3,7 +3,7 @@ * Copyright © 2023 Intel Corporation */ -#include +#include #include "gem/i915_gem_lmem.h" @@ -14,28 +14,15 @@ #include "intel_fbdev_fb.h" struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_fb_helper_surface_size *sizes) + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_display *display = to_intel_display(drm); struct drm_i915_private *dev_priv = to_i915(drm); struct drm_framebuffer *fb; - struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; int size; - /* we don't do packed 24bpp */ - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), 64); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - size = mode_cmd.pitches[0] * mode_cmd.height; + size = mode_cmd->pitches[0] * mode_cmd->height; size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); @@ -64,9 +51,9 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), drm_get_format_info(drm, - mode_cmd.pixel_format, - mode_cmd.modifier[0]), - &mode_cmd); + mode_cmd->pixel_format, + mode_cmd->modifier[0]), + mode_cmd); if (IS_ERR(fb)) { i915_gem_object_put(obj); goto err; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 668ae355f5e5..83454ffbf79c 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -7,14 +7,14 @@ #define __INTEL_FBDEV_FB_H__ struct drm_device; -struct drm_fb_helper_surface_size; struct drm_gem_object; +struct drm_mode_fb_cmd2; struct fb_info; struct i915_vma; struct intel_display; struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_fb_helper_surface_size *sizes); + struct drm_mode_fb_cmd2 *mode_cmd); int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *obj, struct i915_vma *vma); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index b9e855c54304..9c9fb34b3407 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -3,7 +3,7 @@ * Copyright © 2023 Intel Corporation */ -#include +#include #include "intel_display_core.h" #include "intel_display_types.h" @@ -16,27 +16,14 @@ #include struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_fb_helper_surface_size *sizes) + struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_framebuffer *fb; struct xe_device *xe = to_xe_device(drm); - struct drm_mode_fb_cmd2 mode_cmd = {}; struct xe_bo *obj; int size; - /* we don't do packed 24bpp */ - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), 64); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - size = mode_cmd.pitches[0] * mode_cmd.height; + size = mode_cmd->pitches[0] * mode_cmd->height; size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); @@ -67,9 +54,9 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, fb = intel_framebuffer_create(&obj->ttm.base, drm_get_format_info(drm, - mode_cmd.pixel_format, - mode_cmd.modifier[0]), - &mode_cmd); + mode_cmd->pixel_format, + mode_cmd->modifier[0]), + mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; -- cgit v1.2.3 From 7326099d71243ba31ef486931279d9573482292b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:55 +0300 Subject: drm/i915/fbdev: abstract bo creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate fbdev bo creation into a separate function intel_fbdev_fb_bo_create(). Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/cb3999ceae43d56e075c28a1f4581169ce457ab0.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 33 +++++++++++++++++++-------- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 1 + 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 685612e6afc5..bfd05fd34348 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -13,17 +13,11 @@ #include "intel_fb.h" #include "intel_fbdev_fb.h" -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_mode_fb_cmd2 *mode_cmd) +struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) { struct intel_display *display = to_intel_display(drm); struct drm_i915_private *dev_priv = to_i915(drm); - struct drm_framebuffer *fb; struct drm_i915_gem_object *obj; - int size; - - size = mode_cmd->pitches[0] * mode_cmd->height; - size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); if (HAS_LMEM(dev_priv)) { @@ -49,17 +43,36 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, return ERR_PTR(-ENOMEM); } - fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), + return &obj->base; +} + +struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_framebuffer *fb; + struct drm_gem_object *obj; + int size; + + size = mode_cmd->pitches[0] * mode_cmd->height; + size = PAGE_ALIGN(size); + + obj = intel_fbdev_fb_bo_create(drm, size); + if (IS_ERR(obj)) { + fb = ERR_CAST(obj); + goto err; + } + + fb = intel_framebuffer_create(obj, drm_get_format_info(drm, mode_cmd->pixel_format, mode_cmd->modifier[0]), mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); + drm_gem_object_put(obj); goto err; } - i915_gem_object_put(obj); + drm_gem_object_put(obj); return to_intel_framebuffer(fb); err: diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 83454ffbf79c..6a4ba40d5831 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -13,6 +13,7 @@ struct fb_info; struct i915_vma; struct intel_display; +struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size); struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_mode_fb_cmd2 *mode_cmd); int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, -- cgit v1.2.3 From ec4dae5e9b2cbdadcc9e22c0472f8a2749674355 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:56 +0300 Subject: drm/xe/fbdev: abstract bo creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate fbdev bo creation into a separate function intel_fbdev_fb_bo_create(). Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/74dfb9f3e6e05a93d54a8ab534e4384145b52571.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 33 ++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 9c9fb34b3407..3ae9d41cb62c 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -15,16 +15,11 @@ #include -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_mode_fb_cmd2 *mode_cmd) +struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) { - struct drm_framebuffer *fb; struct xe_device *xe = to_xe_device(drm); struct xe_bo *obj; - int size; - size = mode_cmd->pitches[0] * mode_cmd->height; - size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { @@ -48,21 +43,39 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, if (IS_ERR(obj)) { drm_err(&xe->drm, "failed to allocate framebuffer (%pe)\n", obj); - fb = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); + } + + return &obj->ttm.base; +} + +struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_framebuffer *fb; + struct drm_gem_object *obj; + int size; + + size = mode_cmd->pitches[0] * mode_cmd->height; + size = PAGE_ALIGN(size); + + obj = intel_fbdev_fb_bo_create(drm, size); + if (IS_ERR(obj)) { + fb = ERR_CAST(obj); goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, + fb = intel_framebuffer_create(obj, drm_get_format_info(drm, mode_cmd->pixel_format, mode_cmd->modifier[0]), mode_cmd); if (IS_ERR(fb)) { - xe_bo_unpin_map_no_vm(obj); + xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); goto err; } - drm_gem_object_put(&obj->ttm.base); + drm_gem_object_put(obj); return to_intel_framebuffer(fb); -- cgit v1.2.3 From a170c6ca8ba84c3a901796814e4b1d07157f0620 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:57 +0300 Subject: drm/{i915, xe}/fbdev: add intel_fbdev_fb_bo_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 and xe do different things on the failure path; i915 calls drm_gem_object_put() while xe calls xe_bo_unpin_map_no_vm(). Add a helper to enable further refactoring. v2: Call drm_gem_object_put() on intel_fbdev_fb_bo_destroy() Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/22bc3c3158f5a22ab258ada8684766fdf75fefec.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 7 ++++++- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 1 + drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 7 ++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index bfd05fd34348..a7dab8cd3aa2 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -46,6 +46,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size return &obj->base; } +void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) +{ + drm_gem_object_put(obj); +} + struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_mode_fb_cmd2 *mode_cmd) { @@ -68,7 +73,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, mode_cmd->modifier[0]), mode_cmd); if (IS_ERR(fb)) { - drm_gem_object_put(obj); + intel_fbdev_fb_bo_destroy(obj); goto err; } diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 6a4ba40d5831..8b6214b0ad0e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -14,6 +14,7 @@ struct i915_vma; struct intel_display; struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size); +void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj); struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_mode_fb_cmd2 *mode_cmd); int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 3ae9d41cb62c..e1043617c3ae 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -49,6 +49,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size return &obj->ttm.base; } +void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) +{ + xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); +} + struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, struct drm_mode_fb_cmd2 *mode_cmd) { @@ -71,7 +76,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, mode_cmd->modifier[0]), mode_cmd); if (IS_ERR(fb)) { - xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); + intel_fbdev_fb_bo_destroy(obj); goto err; } -- cgit v1.2.3 From f379035fdf89eaa7e411ab589b3a8845bc2d5481 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:58 +0300 Subject: drm/{i915,xe}/fbdev: deduplicate fbdev creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the bo creation helper in place, we can lift intel_framebuffer_create() part to common code. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/7289deac730a877ab1bfcc467f9d063fdccf3930.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 31 +++++++++++++++++++++--- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 34 -------------------------- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 2 -- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 35 --------------------------- 4 files changed, 28 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index e46c08762b84..4bc9a053ca40 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -227,13 +227,38 @@ __intel_fbdev_fb_alloc(struct intel_display *display, struct drm_fb_helper_surface_size *sizes) { struct drm_mode_fb_cmd2 mode_cmd = {}; - struct intel_framebuffer *fb; + struct drm_framebuffer *fb; + struct drm_gem_object *obj; + int size; intel_fbdev_fill_mode_cmd(sizes, &mode_cmd); - fb = intel_fbdev_fb_alloc(display->drm, &mode_cmd); + size = mode_cmd.pitches[0] * mode_cmd.height; + size = PAGE_ALIGN(size); + + obj = intel_fbdev_fb_bo_create(display->drm, size); + if (IS_ERR(obj)) { + fb = ERR_CAST(obj); + goto err; + } + + fb = intel_framebuffer_create(obj, + drm_get_format_info(display->drm, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); + if (IS_ERR(fb)) { + intel_fbdev_fb_bo_destroy(obj); + goto err; + } + + drm_gem_object_put(obj); + + return to_intel_framebuffer(fb); + +err: + return ERR_CAST(fb); - return fb; } int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index a7dab8cd3aa2..c802a4b2bfc7 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -10,7 +10,6 @@ #include "i915_drv.h" #include "intel_display_core.h" #include "intel_display_types.h" -#include "intel_fb.h" #include "intel_fbdev_fb.h" struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) @@ -51,39 +50,6 @@ void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) drm_gem_object_put(obj); } -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_mode_fb_cmd2 *mode_cmd) -{ - struct drm_framebuffer *fb; - struct drm_gem_object *obj; - int size; - - size = mode_cmd->pitches[0] * mode_cmd->height; - size = PAGE_ALIGN(size); - - obj = intel_fbdev_fb_bo_create(drm, size); - if (IS_ERR(obj)) { - fb = ERR_CAST(obj); - goto err; - } - - fb = intel_framebuffer_create(obj, - drm_get_format_info(drm, - mode_cmd->pixel_format, - mode_cmd->modifier[0]), - mode_cmd); - if (IS_ERR(fb)) { - intel_fbdev_fb_bo_destroy(obj); - goto err; - } - - drm_gem_object_put(obj); - - return to_intel_framebuffer(fb); -err: - return ERR_CAST(fb); -} - int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 8b6214b0ad0e..3b7b59d664b5 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -15,8 +15,6 @@ struct intel_display; struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size); void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj); -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_mode_fb_cmd2 *mode_cmd); int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *obj, struct i915_vma *vma); diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index e1043617c3ae..c23da0dd5336 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -7,7 +7,6 @@ #include "intel_display_core.h" #include "intel_display_types.h" -#include "intel_fb.h" #include "intel_fbdev_fb.h" #include "xe_bo.h" #include "xe_ttm_stolen_mgr.h" @@ -54,40 +53,6 @@ void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); } -struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_device *drm, - struct drm_mode_fb_cmd2 *mode_cmd) -{ - struct drm_framebuffer *fb; - struct drm_gem_object *obj; - int size; - - size = mode_cmd->pitches[0] * mode_cmd->height; - size = PAGE_ALIGN(size); - - obj = intel_fbdev_fb_bo_create(drm, size); - if (IS_ERR(obj)) { - fb = ERR_CAST(obj); - goto err; - } - - fb = intel_framebuffer_create(obj, - drm_get_format_info(drm, - mode_cmd->pixel_format, - mode_cmd->modifier[0]), - mode_cmd); - if (IS_ERR(fb)) { - intel_fbdev_fb_bo_destroy(obj); - goto err; - } - - drm_gem_object_put(obj); - - return to_intel_framebuffer(fb); - -err: - return ERR_CAST(fb); -} - int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { -- cgit v1.2.3 From 5c3a68857ddb1d9544f90d2ae7f5b45b16822917 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:40:59 +0300 Subject: drm/{i915, xe}/fbdev: pass struct drm_device to intel_fbdev_fb_fill_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code is in fact driver core rather than display specific. Pass struct drm_device instead of struct intel_display. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/1f633154f5f3106f55d7525a711bf347f5635ea7.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 6 +++--- drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 3 +-- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 6 ++---- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 4bc9a053ca40..3fbdf75415cc 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -332,7 +332,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, obj = intel_fb_bo(&fb->base); - ret = intel_fbdev_fb_fill_info(display, info, obj, vma); + ret = intel_fbdev_fb_fill_info(display->drm, info, obj, vma); if (ret) goto out_unpin; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index c802a4b2bfc7..8af409bff0f0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -50,10 +50,10 @@ void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) drm_gem_object_put(obj); } -int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { - struct drm_i915_private *i915 = to_i915(display->drm); + struct drm_i915_private *i915 = to_i915(drm); struct drm_i915_gem_object *obj = to_intel_bo(_obj); struct i915_gem_ww_ctx ww; void __iomem *vaddr; @@ -85,7 +85,7 @@ int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { - drm_err(display->drm, + drm_err(drm, "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); ret = PTR_ERR(vaddr); continue; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 3b7b59d664b5..1fa44ed28543 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -11,11 +11,10 @@ struct drm_gem_object; struct drm_mode_fb_cmd2; struct fb_info; struct i915_vma; -struct intel_display; struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size); void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj); -int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info, struct drm_gem_object *obj, struct i915_vma *vma); #endif diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index c23da0dd5336..3dfab0c2b827 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -5,8 +5,6 @@ #include -#include "intel_display_core.h" -#include "intel_display_types.h" #include "intel_fbdev_fb.h" #include "xe_bo.h" #include "xe_ttm_stolen_mgr.h" @@ -53,11 +51,11 @@ void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj) xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj)); } -int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info, +int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info, struct drm_gem_object *_obj, struct i915_vma *vma) { struct xe_bo *obj = gem_to_xe_bo(_obj); - struct pci_dev *pdev = to_pci_dev(display->drm->dev); + struct pci_dev *pdev = to_pci_dev(drm->dev); if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { if (obj->flags & XE_BO_FLAG_STOLEN) -- cgit v1.2.3 From ffce45f241833084f526d44666e9de677bd70f2b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 11:41:00 +0300 Subject: drm/i915/fbdev: drop dependency on display in i915 specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This code is in fact i915 driver core rather than display specific. Stop using struct intel_display, and drop the dependency on display headers. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/a2faad2b47c63ea773a96b2885fb759602374264.1758184771.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 8af409bff0f0..56b145841473 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -8,13 +8,10 @@ #include "gem/i915_gem_lmem.h" #include "i915_drv.h" -#include "intel_display_core.h" -#include "intel_display_types.h" #include "intel_fbdev_fb.h" struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) { - struct intel_display *display = to_intel_display(drm); struct drm_i915_private *dev_priv = to_i915(drm); struct drm_i915_gem_object *obj; @@ -31,7 +28,7 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size * * Also skip stolen on MTL as Wa_22018444074 mitigation. */ - if (!display->platform.meteorlake && size * 2 < dev_priv->dsm.usable_size) + if (!IS_METEORLAKE(dev_priv) && size * 2 < dev_priv->dsm.usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); -- cgit v1.2.3 From 96b8ccbe7ffa5bb49caf82167ea54a7755f06f3d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 15:25:44 +0300 Subject: drm/i915/irq: use a dedicated IMR cache for VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are three groups of platforms using i915->irq_mask independently: gen 2-4, VLV/CHV, and gen 5-7. The VLV/CHV usage is purely limited to display. Move its irq_mask usage to struct intel_display as vlv_imr_mask for VLV/CHV. vlv_imr_mask could be put inside a union with de_irq_mask[], but keep them separate to avoid accidental aliasing of the values. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/cef6dee8d0b02ff76180c5879f3056e102947a57.1758198300.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_core.h | 5 +++++ drivers/gpu/drm/i915/display/intel_display_irq.c | 11 ++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 791021a4e3bb..48a707557c29 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -475,6 +475,11 @@ struct intel_display { struct work_struct vblank_notify_work; + /* + * Cached value of VLV/CHV IMR to avoid reads in updating the + * bitfield. + */ + u32 vlv_imr_mask; u32 de_irq_mask[I915_MAX_PIPES]; u32 pipestat_irq_mask[I915_MAX_PIPES]; } irq; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 123e054affbe..df718670546b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1865,8 +1865,6 @@ void vlv_display_error_irq_handler(struct intel_display *display, static void _vlv_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (display->platform.cherryview) intel_de_write(display, DPINVGTT, DPINVGTT_STATUS_MASK_CHV); else @@ -1881,7 +1879,7 @@ static void _vlv_display_irq_reset(struct intel_display *display) i9xx_pipestat_irq_reset(display); intel_display_irq_regs_reset(display, VLV_IRQ_REGS); - dev_priv->irq_mask = ~0u; + display->irq.vlv_imr_mask = ~0u; } void vlv_display_irq_reset(struct intel_display *display) @@ -1939,7 +1937,6 @@ static u32 vlv_error_mask(void) static void _vlv_display_irq_postinstall(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); u32 pipestat_mask; u32 enable_mask; enum pipe pipe; @@ -1973,11 +1970,11 @@ static void _vlv_display_irq_postinstall(struct intel_display *display) enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | I915_LPE_PIPE_C_INTERRUPT; - drm_WARN_ON(display->drm, dev_priv->irq_mask != ~0u); + drm_WARN_ON(display->drm, display->irq.vlv_imr_mask != ~0u); - dev_priv->irq_mask = ~enable_mask; + display->irq.vlv_imr_mask = ~enable_mask; - intel_display_irq_regs_init(display, VLV_IRQ_REGS, dev_priv->irq_mask, enable_mask); + intel_display_irq_regs_init(display, VLV_IRQ_REGS, display->irq.vlv_imr_mask, enable_mask); } void vlv_display_irq_postinstall(struct intel_display *display) -- cgit v1.2.3 From f2c6777dd9f733f76fb6658d62a301ed57f4cd61 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 15:25:45 +0300 Subject: drm/i915/irq: use a dedicated IMR cache for gen 5-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are three groups of platforms using i915->irq_mask independently: gen 2-4, VLV/CHV, and gen 5-7. The gen 5-7 usage is primarily limited to display. Move its irq_mask usage to struct intel_display as ilk_de_imr_mask for gen 5-7. ilk_de_imr_mask could be put inside a union with with vlv_imr_mask and de_irq_mask[], but keep them separate to avoid accidental aliasing of the values. With this, we can also drop the irq_mask member from struct xe_device. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/adf60e74b890d52dd20ab4673111ae2063d33b49.1758198300.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_core.h | 5 +++++ drivers/gpu/drm/i915/display/intel_display_irq.c | 14 ++++++-------- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 3 --- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 48a707557c29..4a52bbe327b7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -480,6 +480,11 @@ struct intel_display { * bitfield. */ u32 vlv_imr_mask; + /* + * Cached value of gen 5-7 DE IMR to avoid reads in updating the + * bitfield. + */ + u32 ilk_de_imr_mask; u32 de_irq_mask[I915_MAX_PIPES]; u32 pipestat_irq_mask[I915_MAX_PIPES]; } irq; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index df718670546b..f4ba9b08e044 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -140,14 +140,14 @@ void ilk_update_display_irq(struct intel_display *display, lockdep_assert_held(&display->irq.lock); drm_WARN_ON(display->drm, enabled_irq_mask & ~interrupt_mask); - new_val = dev_priv->irq_mask; + new_val = display->irq.ilk_de_imr_mask; new_val &= ~interrupt_mask; new_val |= (~enabled_irq_mask & interrupt_mask); - if (new_val != dev_priv->irq_mask && + if (new_val != display->irq.ilk_de_imr_mask && !drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv))) { - dev_priv->irq_mask = new_val; - intel_de_write(display, DEIMR, dev_priv->irq_mask); + display->irq.ilk_de_imr_mask = new_val; + intel_de_write(display, DEIMR, display->irq.ilk_de_imr_mask); intel_de_posting_read(display, DEIMR); } } @@ -2180,8 +2180,6 @@ out: void ilk_de_irq_postinstall(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - u32 display_mask, extra_mask; if (DISPLAY_VER(display) >= 7) { @@ -2213,11 +2211,11 @@ void ilk_de_irq_postinstall(struct intel_display *display) if (display->platform.ironlake && display->platform.mobile) extra_mask |= DE_PCU_EVENT; - i915->irq_mask = ~display_mask; + display->irq.ilk_de_imr_mask = ~display_mask; ibx_irq_postinstall(display); - intel_display_irq_regs_init(display, DE_IRQ_REGS, i915->irq_mask, + intel_display_irq_regs_init(display, DE_IRQ_REGS, display->irq.ilk_de_imr_mask, display_mask | extra_mask); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8d5da222a187..e5fdfd51b549 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -659,7 +659,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv) struct intel_uncore *uncore = &dev_priv->uncore; gen2_irq_reset(uncore, DE_IRQ_REGS); - dev_priv->irq_mask = ~0u; + display->irq.ilk_de_imr_mask = ~0u; if (GRAPHICS_VER(dev_priv) == 7) intel_uncore_write(uncore, GEN7_ERR_INT, 0xffffffff); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4353256452aa..79d3a59762d9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -611,9 +611,6 @@ struct xe_device { /* To shut up runtime pm macros.. */ struct xe_runtime_pm {} runtime_pm; - /* only to allow build, not used functionally */ - u32 irq_mask; - struct intel_uncore { spinlock_t lock; } uncore; -- cgit v1.2.3 From cb4242e34ff82857ef814cbd9d77d4d73b31d0eb Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 15:25:46 +0300 Subject: drm/i915/irq: rename irq_mask to gen2_imr_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the struct drm_i915_private irq_mask member to gen2_imr_mask to reflect its usage more accurately. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/2c193663cd3ae524d8159b4216e45462017042fa.1758198300.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen2_engine_cs.c | 8 ++++---- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 8116fd5987e2..8c01fb6d4e7b 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -292,15 +292,15 @@ int gen4_emit_bb_start(struct i915_request *rq, void gen2_irq_enable(struct intel_engine_cs *engine) { - engine->i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); + engine->i915->gen2_imr_mask &= ~engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask); intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); } void gen2_irq_disable(struct intel_engine_cs *engine) { - engine->i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); + engine->i915->gen2_imr_mask |= engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask); } void gen5_irq_enable(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 37970d8db255..03e497d2081e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -234,8 +234,8 @@ struct drm_i915_private { /* Sideband mailbox protection */ struct mutex sb_lock; - /** Cached value of IMR to avoid reads in updating the bitfield */ - u32 irq_mask; + /* Cached value of gen 2-4 IMR to avoid reads in updating the bitfield */ + u32 gen2_imr_mask; bool preserve_bios_swizzle; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e5fdfd51b549..ab65402bc6bf 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -897,7 +897,7 @@ static void i915_irq_reset(struct drm_i915_private *dev_priv) gen2_error_reset(uncore, GEN2_ERROR_REGS); gen2_irq_reset(uncore, GEN2_IRQ_REGS); - dev_priv->irq_mask = ~0u; + dev_priv->gen2_imr_mask = ~0u; } static void i915_irq_postinstall(struct drm_i915_private *dev_priv) @@ -908,7 +908,7 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i9xx_error_mask(dev_priv)); - dev_priv->irq_mask = + dev_priv->gen2_imr_mask = ~(I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | I915_MASTER_ERROR_INTERRUPT); @@ -920,16 +920,16 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) I915_USER_INTERRUPT; if (DISPLAY_VER(display) >= 3) { - dev_priv->irq_mask &= ~I915_ASLE_INTERRUPT; + dev_priv->gen2_imr_mask &= ~I915_ASLE_INTERRUPT; enable_mask |= I915_ASLE_INTERRUPT; } if (HAS_HOTPLUG(display)) { - dev_priv->irq_mask &= ~I915_DISPLAY_PORT_INTERRUPT; + dev_priv->gen2_imr_mask &= ~I915_DISPLAY_PORT_INTERRUPT; enable_mask |= I915_DISPLAY_PORT_INTERRUPT; } - gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->irq_mask, enable_mask); + gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->gen2_imr_mask, enable_mask); i915_display_irq_postinstall(display); } @@ -999,7 +999,7 @@ static void i965_irq_reset(struct drm_i915_private *dev_priv) gen2_error_reset(uncore, GEN2_ERROR_REGS); gen2_irq_reset(uncore, GEN2_IRQ_REGS); - dev_priv->irq_mask = ~0u; + dev_priv->gen2_imr_mask = ~0u; } static u32 i965_error_mask(struct drm_i915_private *i915) @@ -1029,7 +1029,7 @@ static void i965_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i965_error_mask(dev_priv)); - dev_priv->irq_mask = + dev_priv->gen2_imr_mask = ~(I915_ASLE_INTERRUPT | I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | @@ -1047,7 +1047,7 @@ static void i965_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv)) enable_mask |= I915_BSD_USER_INTERRUPT; - gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->irq_mask, enable_mask); + gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->gen2_imr_mask, enable_mask); i965_display_irq_postinstall(display); } -- cgit v1.2.3 From a5ef491e903e4bbb681bdaddfc2bcd7cdf43cea6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 15:25:47 +0300 Subject: drm/i915/irq: rename de_irq_mask[] to de_pipe_imr_mask[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the struct intel_display de_irq_mask[] member to de_pipe_imr_mask[] to reflect its usage more accurately. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/55bbf17df871331c2c34af748cf9cf812d6a65d7.1758198300.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_core.h | 6 +++++- drivers/gpu/drm/i915/display/intel_display_irq.c | 16 ++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 4a52bbe327b7..df4da52cbdb3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -485,7 +485,11 @@ struct intel_display { * bitfield. */ u32 ilk_de_imr_mask; - u32 de_irq_mask[I915_MAX_PIPES]; + /* + * Cached value of BDW+ DE pipe IMR to avoid reads in updating + * the bitfield. + */ + u32 de_pipe_imr_mask[I915_MAX_PIPES]; u32 pipestat_irq_mask[I915_MAX_PIPES]; } irq; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index f4ba9b08e044..93c2e42f98c9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -215,13 +215,13 @@ static void bdw_update_pipe_irq(struct intel_display *display, if (drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv))) return; - new_val = display->irq.de_irq_mask[pipe]; + new_val = display->irq.de_pipe_imr_mask[pipe]; new_val &= ~interrupt_mask; new_val |= (~enabled_irq_mask & interrupt_mask); - if (new_val != display->irq.de_irq_mask[pipe]) { - display->irq.de_irq_mask[pipe] = new_val; - intel_de_write(display, GEN8_DE_PIPE_IMR(pipe), display->irq.de_irq_mask[pipe]); + if (new_val != display->irq.de_pipe_imr_mask[pipe]) { + display->irq.de_pipe_imr_mask[pipe] = new_val; + intel_de_write(display, GEN8_DE_PIPE_IMR(pipe), display->irq.de_pipe_imr_mask[pipe]); intel_de_posting_read(display, GEN8_DE_PIPE_IMR(pipe)); } } @@ -2085,8 +2085,8 @@ void gen8_irq_power_well_post_enable(struct intel_display *display, for_each_pipe_masked(display, pipe, pipe_mask) intel_display_irq_regs_init(display, GEN8_DE_PIPE_IRQ_REGS(pipe), - display->irq.de_irq_mask[pipe], - ~display->irq.de_irq_mask[pipe] | extra_ier); + display->irq.de_pipe_imr_mask[pipe], + ~display->irq.de_pipe_imr_mask[pipe] | extra_ier); spin_unlock_irq(&display->irq.lock); } @@ -2300,12 +2300,12 @@ void gen8_de_irq_postinstall(struct intel_display *display) } for_each_pipe(display, pipe) { - display->irq.de_irq_mask[pipe] = ~de_pipe_masked; + display->irq.de_pipe_imr_mask[pipe] = ~de_pipe_masked; if (intel_display_power_is_enabled(display, POWER_DOMAIN_PIPE(pipe))) intel_display_irq_regs_init(display, GEN8_DE_PIPE_IRQ_REGS(pipe), - display->irq.de_irq_mask[pipe], + display->irq.de_pipe_imr_mask[pipe], de_pipe_enables); } -- cgit v1.2.3 From 4c26361cc68f72299a8d0eea48734e39eb3b78b3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 Sep 2025 16:38:35 +0300 Subject: drm/i915/irq: add ilk_display_irq_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abstract ilk_display_irq_reset(), moving display related reset there. This results in a slightly different order between GT and PCH reset, hopefully with no impact. v3: Reset display first (Ville) v2: Also move GEN7_ERR_INT (Ville) Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250918133835.2412980-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_irq.c | 20 +++++++++++++++++++- drivers/gpu/drm/i915/display/intel_display_irq.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 16 ++-------------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 93c2e42f98c9..c6f367e6159e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1985,7 +1985,7 @@ void vlv_display_irq_postinstall(struct intel_display *display) spin_unlock_irq(&display->irq.lock); } -void ibx_display_irq_reset(struct intel_display *display) +static void ibx_display_irq_reset(struct intel_display *display) { if (HAS_PCH_NOP(display)) return; @@ -1996,6 +1996,24 @@ void ibx_display_irq_reset(struct intel_display *display) intel_de_write(display, SERR_INT, 0xffffffff); } +void ilk_display_irq_reset(struct intel_display *display) +{ + struct intel_uncore *uncore = to_intel_uncore(display->drm); + + gen2_irq_reset(uncore, DE_IRQ_REGS); + display->irq.ilk_de_imr_mask = ~0u; + + if (DISPLAY_VER(display) == 7) + intel_de_write(display, GEN7_ERR_INT, 0xffffffff); + + if (display->platform.haswell) { + intel_de_write(display, EDP_PSR_IMR, 0xffffffff); + intel_de_write(display, EDP_PSR_IIR, 0xffffffff); + } + + ibx_display_irq_reset(display); +} + void gen8_display_irq_reset(struct intel_display *display) { enum pipe pipe; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h index c66db3851da4..cee120347064 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.h +++ b/drivers/gpu/drm/i915/display/intel_display_irq.h @@ -56,7 +56,7 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl); void gen11_gu_misc_irq_handler(struct intel_display *display, const u32 iir); void i9xx_display_irq_reset(struct intel_display *display); -void ibx_display_irq_reset(struct intel_display *display); +void ilk_display_irq_reset(struct intel_display *display); void vlv_display_irq_reset(struct intel_display *display); void gen8_display_irq_reset(struct intel_display *display); void gen11_display_irq_reset(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ab65402bc6bf..7c7c6dcbce88 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -656,22 +656,10 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) static void ilk_irq_reset(struct drm_i915_private *dev_priv) { struct intel_display *display = dev_priv->display; - struct intel_uncore *uncore = &dev_priv->uncore; - - gen2_irq_reset(uncore, DE_IRQ_REGS); - display->irq.ilk_de_imr_mask = ~0u; - - if (GRAPHICS_VER(dev_priv) == 7) - intel_uncore_write(uncore, GEN7_ERR_INT, 0xffffffff); - - if (IS_HASWELL(dev_priv)) { - intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); - intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); - } + /* The master interrupt enable is in DEIER, reset display irq first */ + ilk_display_irq_reset(display); gen5_gt_irq_reset(to_gt(dev_priv)); - - ibx_display_irq_reset(display); } static void valleyview_irq_reset(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From cef58ce505a04a896d59cb207f0a0ccea1eec5ca Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Wed, 13 Aug 2025 10:50:17 +0530 Subject: drm/i915: Soft defeature of cursor size reduction From display 14 onward do not enable the cursor size reduction bit as it has been defeatured. Bspec: 50372 Signed-off-by: Nemesa Garg Reviewed-by: Chaitanya Kumar Borah Signed-off-by: Animesh Manna Link: https://lore.kernel.org/r/20250813052017.3591331-1-nemesa.garg@intel.com --- drivers/gpu/drm/i915/display/intel_cursor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index d4d181f9dca5..c47c84935871 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -662,7 +662,7 @@ static void i9xx_cursor_update_arm(struct intel_dsb *dsb, cntl = plane_state->ctl | i9xx_cursor_ctl_crtc(crtc_state); - if (width != height) + if (DISPLAY_VER(display) < 14 && width != height) fbc_ctl = CUR_FBC_EN | CUR_FBC_HEIGHT(height - 1); base = plane_state->surf; -- cgit v1.2.3 From db7944458f4e5cdc11402e18ccbbc7aac4286f4b Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 9 Sep 2025 11:30:11 +0300 Subject: drm/i915/dmc: explicitly sanitize num_entries from package_header num_entries comes from package_header, which is read from an external firmware blob and thus untrusted. In parse_dmc_fw_package() we assign package_header->num_entries to a local variable, but the range check still uses the struct field directly. Switch the check to use the local copy instead. This makes the sanitization explicit and avoids a redundant dereference. Reviewed-by: Mitul Golani Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20250909083042.1292672-1-luciano.coelho@intel.com --- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 77a0199f9ea5..517bebb0b4aa 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -1141,7 +1141,7 @@ parse_dmc_fw_package(struct intel_dmc *dmc, } num_entries = package_header->num_entries; - if (WARN_ON(package_header->num_entries > max_entries)) + if (WARN_ON(num_entries > max_entries)) num_entries = max_entries; fw_info = (const struct intel_fw_info *) -- cgit v1.2.3 From fab82f47246b768c0ea51564e4a3f2ad9e6fa7d3 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 02:22:25 +0300 Subject: drm/i915/vrr: Hide the ICL/TGL intel_vrr_flipline_offset() mangling better MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICL/TGL VRR hardware won't allow us to program flipline==vmin. If we do that the actual effect will be the same as if we had programmed flipline=vmin+1, which would make the minimum vtotal one scanline taller than expected. To compensate for this we reduce vmin by one, and then program flipline=vmin+1. So we end up with a flipline value that matches the expected minimum vtotal. Currently this adjustment happens in intel_vrr_compute_config() which means that crtc_state->vrr.vmin will no longer be directly usable for the remainder of the high level VRR code. That is annoying at best, fragile at worst. Hide the adjustment in low level code instead. This will allow most of the higher level VRR code to remain blissfully ignorant about this fact. Afterwards crtc_state->vrr.{vmin,flipline} will be equal and match the minimum vtotal, exactly how things already work on ADL+. The only slight downside is that the actual register value will no longer match crtc_state->vrr.vmin on ICL/TGL, but that may already be the case on TGL because the register value will also have been adjusted by the SCL. Note that we must change the guardband calculation to account for intel_vrr_extra_vblank_delay() explicitly. Previously that was accidentally handled by the earlier vmin reduction by intel_vrr_flipline_offset(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250918232226.25295-2-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 71a985d00604..e725b4581e81 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -108,15 +108,20 @@ int intel_vrr_vblank_delay(const struct intel_crtc_state *crtc_state) static int intel_vrr_flipline_offset(struct intel_display *display) { - /* ICL/TGL hardware imposes flipline>=vmin+1 */ + /* + * ICL/TGL hardware imposes flipline>=vmin+1 + * + * We reduce the vmin value to compensate when programming the + * hardware. This approach allows flipline to remain set at the + * original value, and thus the frame will have the desired + * minimum vtotal. + */ return DISPLAY_VER(display) < 13 ? 1 : 0; } static int intel_vrr_vmin_flipline(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - - return crtc_state->vrr.vmin + intel_vrr_flipline_offset(display); + return crtc_state->vrr.vmin; } static int intel_vrr_guardband_to_pipeline_full(const struct intel_crtc_state *crtc_state, @@ -400,13 +405,6 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, else intel_vrr_compute_fixed_rr_timings(crtc_state); - /* - * flipline determines the min vblank length the hardware will - * generate, and on ICL/TGL flipline>=vmin+1, hence we reduce - * vmin by one to make sure we can get the actual min vblank length. - */ - crtc_state->vrr.vmin -= intel_vrr_flipline_offset(display); - if (HAS_AS_SDP(display)) { crtc_state->vrr.vsync_start = (crtc_state->hw.adjusted_mode.crtc_vtotal - @@ -426,7 +424,8 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) return; crtc_state->vrr.guardband = - crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start; + crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start - + intel_vrr_extra_vblank_delay(display); if (DISPLAY_VER(display) < 13) { /* FIXME handle the limit in a proper way */ @@ -597,7 +596,10 @@ void intel_vrr_set_db_point_and_transmission_line(const struct intel_crtc_state static int intel_vrr_hw_vmin(const struct intel_crtc_state *crtc_state) { - return intel_vrr_hw_value(crtc_state, crtc_state->vrr.vmin); + struct intel_display *display = to_intel_display(crtc_state); + + return intel_vrr_hw_value(crtc_state, crtc_state->vrr.vmin) - + intel_vrr_flipline_offset(display); } static int intel_vrr_hw_vmax(const struct intel_crtc_state *crtc_state) @@ -762,6 +764,8 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) crtc_state->vrr.flipline += intel_vrr_real_vblank_delay(crtc_state); crtc_state->vrr.vmax += intel_vrr_real_vblank_delay(crtc_state); crtc_state->vrr.vmin += intel_vrr_real_vblank_delay(crtc_state); + + crtc_state->vrr.vmin += intel_vrr_flipline_offset(display); } /* -- cgit v1.2.3 From 50720b6708010333a857ff572ff9e4387ba0fd1a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 02:22:26 +0300 Subject: drm/i915/vrr: s/intel_vrr_flipline_offset/intel_vrr_vmin_flipline_offset/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_vrr_flipline_offset() to intel_vrr_vmin_flipline_offset() to better reflect the fact that it gives us the minimum offset allowed between vmin and flipline. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250918232226.25295-3-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index e725b4581e81..9e007aab1452 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -106,7 +106,7 @@ int intel_vrr_vblank_delay(const struct intel_crtc_state *crtc_state) intel_vrr_extra_vblank_delay(display); } -static int intel_vrr_flipline_offset(struct intel_display *display) +static int intel_vrr_vmin_flipline_offset(struct intel_display *display) { /* * ICL/TGL hardware imposes flipline>=vmin+1 @@ -288,7 +288,7 @@ int intel_vrr_fixed_rr_hw_vmin(const struct intel_crtc_state *crtc_state) struct intel_display *display = to_intel_display(crtc_state); return intel_vrr_fixed_rr_hw_vtotal(crtc_state) - - intel_vrr_flipline_offset(display); + intel_vrr_vmin_flipline_offset(display); } static @@ -599,7 +599,7 @@ static int intel_vrr_hw_vmin(const struct intel_crtc_state *crtc_state) struct intel_display *display = to_intel_display(crtc_state); return intel_vrr_hw_value(crtc_state, crtc_state->vrr.vmin) - - intel_vrr_flipline_offset(display); + intel_vrr_vmin_flipline_offset(display); } static int intel_vrr_hw_vmax(const struct intel_crtc_state *crtc_state) @@ -765,7 +765,7 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) crtc_state->vrr.vmax += intel_vrr_real_vblank_delay(crtc_state); crtc_state->vrr.vmin += intel_vrr_real_vblank_delay(crtc_state); - crtc_state->vrr.vmin += intel_vrr_flipline_offset(display); + crtc_state->vrr.vmin += intel_vrr_vmin_flipline_offset(display); } /* -- cgit v1.2.3 From c8e8e9ab14a6ea926641d161768e1e3ef286a853 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 8 Sep 2025 09:52:08 +0530 Subject: drm/i915/ddi: Guard reg_val against a INVALID_TRANSCODER Currently we check if the encoder is INVALID or -1 and throw a WARN_ON but we still end up writing the temp value which will overflow and corrupt the whole programmed value. --v2 -Assign a bogus transcoder to master in case we get a INVALID TRANSCODER [Jani] Fixes: 6671c367a9bea ("drm/i915/tgl: Select master transcoder for MST stream") Signed-off-by: Suraj Kandpal Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250908042208.1011144-1-suraj.kandpal@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 08083ac83a74..c09aa759f4d4 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -597,8 +597,9 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, enum transcoder master; master = crtc_state->mst_master_transcoder; - drm_WARN_ON(display->drm, - master == INVALID_TRANSCODER); + if (drm_WARN_ON(display->drm, + master == INVALID_TRANSCODER)) + master = TRANSCODER_A; temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); } } else { -- cgit v1.2.3 From 90930b637dce2e600728829250645734929d7388 Mon Sep 17 00:00:00 2001 From: Michał Grzelak Date: Fri, 19 Sep 2025 23:54:13 +0200 Subject: drm/i915: rename vlv_get_cck_clock() to vlv_clock_get_cck() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the convention of naming vlv_clock* instead of vlv_*_clock. Signed-off-by: Michał Grzelak Link: https://lore.kernel.org/r/20250919215413.1006296-1-michal.grzelak@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_clock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_clock.c b/drivers/gpu/drm/i915/display/vlv_clock.c index 42c2837b32c1..1abdae453514 100644 --- a/drivers/gpu/drm/i915/display/vlv_clock.c +++ b/drivers/gpu/drm/i915/display/vlv_clock.c @@ -36,7 +36,7 @@ int vlv_clock_get_hpll_vco(struct drm_device *drm) return display->vlv_clock.hpll_freq; } -static int vlv_get_cck_clock(struct drm_device *drm, +static int vlv_clock_get_cck(struct drm_device *drm, const char *name, u32 reg, int ref_freq) { u32 val; @@ -58,7 +58,7 @@ static int vlv_get_cck_clock(struct drm_device *drm, int vlv_clock_get_hrawclk(struct drm_device *drm) { /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL, + return vlv_clock_get_cck(drm, "hrawclk", CCK_DISPLAY_REF_CLOCK_CONTROL, vlv_clock_get_hpll_vco(drm)); } @@ -67,7 +67,7 @@ int vlv_clock_get_czclk(struct drm_device *drm) struct intel_display *display = to_intel_display(drm); if (!display->vlv_clock.czclk_freq) { - display->vlv_clock.czclk_freq = vlv_get_cck_clock(drm, "czclk", CCK_CZ_CLOCK_CONTROL, + display->vlv_clock.czclk_freq = vlv_clock_get_cck(drm, "czclk", CCK_CZ_CLOCK_CONTROL, vlv_clock_get_hpll_vco(drm)); drm_dbg_kms(drm, "CZ clock rate: %d kHz\n", display->vlv_clock.czclk_freq); } @@ -77,12 +77,12 @@ int vlv_clock_get_czclk(struct drm_device *drm) int vlv_clock_get_cdclk(struct drm_device *drm) { - return vlv_get_cck_clock(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, + return vlv_clock_get_cck(drm, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, vlv_clock_get_hpll_vco(drm)); } int vlv_clock_get_gpll(struct drm_device *drm) { - return vlv_get_cck_clock(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, + return vlv_clock_get_cck(drm, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, vlv_clock_get_czclk(drm)); } -- cgit v1.2.3 From bc412f9a993b49b1af22cc4c2f834f4350b06957 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:10 +0300 Subject: drm/i915/pm: Simplify pm hook documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop spelling out each variant of the hook ("" vs. "_late" vs. "_early") and just say eg. "@thaw*" to indicate all of them. Avoids having to update the docs whenever we start/stop using one of the variants. Reviewed-by: Rodrigo Vivi Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index a28c3710c4d5..76bda4376fe0 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1670,18 +1670,18 @@ const struct dev_pm_ops i915_pm_ops = { /* * S4 event handlers - * @freeze, @freeze_late : called (1) before creating the - * hibernation image [PMSG_FREEZE] and - * (2) after rebooting, before restoring - * the image [PMSG_QUIESCE] - * @thaw, @thaw_early : called (1) after creating the hibernation - * image, before writing it [PMSG_THAW] - * and (2) after failing to create or - * restore the image [PMSG_RECOVER] - * @poweroff, @poweroff_late: called after writing the hibernation - * image, before rebooting [PMSG_HIBERNATE] - * @restore, @restore_early : called after rebooting and restoring the - * hibernation image [PMSG_RESTORE] + * @freeze* : called (1) before creating the + * hibernation image [PMSG_FREEZE] and + * (2) after rebooting, before restoring + * the image [PMSG_QUIESCE] + * @thaw* : called (1) after creating the hibernation + * image, before writing it [PMSG_THAW] + * and (2) after failing to create or + * restore the image [PMSG_RECOVER] + * @poweroff* : called after writing the hibernation + * image, before rebooting [PMSG_HIBERNATE] + * @restore* : called after rebooting and restoring the + * hibernation image [PMSG_RESTORE] */ .freeze = i915_pm_freeze, .freeze_late = i915_pm_freeze_late, -- cgit v1.2.3 From cead397a976dde554f5bfba48a06f49c29bc3982 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:11 +0300 Subject: drm/i915/pm: Hoist pci_save_state()+pci_set_power_state() to the end of pm _late() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/pci does the pci_save_state()+pci_set_power_state() from the _noirq() pm hooks. Move our manual calls (needed for the hibernate vs. D3 workaround with buggy BIOSes) towards that same point. We currently have no _noirq() hooks, so end of _late() hooks is the best we can do right now. Reviewed-by: Rodrigo Vivi Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 76bda4376fe0..66f2acacd516 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1053,7 +1053,6 @@ static int i915_drm_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_display *display = dev_priv->display; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); pci_power_t opregion_target_state; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); @@ -1067,8 +1066,6 @@ static int i915_drm_suspend(struct drm_device *dev) intel_display_driver_disable_user_access(display); } - pci_save_state(pdev); - intel_display_driver_suspend(display); intel_irq_suspend(dev_priv); @@ -1125,10 +1122,16 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) drm_err(&dev_priv->drm, "Suspend complete failed: %d\n", ret); intel_display_power_resume_early(display); - goto out; + goto fail; } + enable_rpm_wakeref_asserts(rpm); + + if (!dev_priv->uncore.user_forcewake_count) + intel_runtime_pm_driver_release(rpm); + pci_disable_device(pdev); + /* * During hibernation on some platforms the BIOS may try to access * the device even though it's already in D3 and hang the machine. So @@ -1140,11 +1143,17 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) * Lenovo Thinkpad X301, X61s, X60, T60, X41 * Fujitsu FSC S7110 * Acer Aspire 1830T + * + * pci_save_state() prevents drivers/pci from + * automagically putting the device into D3. */ + pci_save_state(pdev); if (!(hibernation && GRAPHICS_VER(dev_priv) < 6)) pci_set_power_state(pdev, PCI_D3hot); -out: + return 0; + +fail: enable_rpm_wakeref_asserts(rpm); if (!dev_priv->uncore.user_forcewake_count) intel_runtime_pm_driver_release(rpm); -- cgit v1.2.3 From f3d8e898ce5e97b2fbc3321b8fbfd31826357f6e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:12 +0300 Subject: drm/i915/pm: Move the hibernate+D3 quirk stuff into noirq() pm hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the driver doesn't call pci_save_state() drivers/pci will normally save+power manage the device from the _noirq() pm hooks. We can't let that happen as some old BIOSes fail to hibernate when the device is in D3. However, we can get very close to the standard behaviour by doing our explicit pci_save_state() and pci_set_power_state() stuff from driver provided _noirq() hooks. This results in a change of behaviour where we no longer go into D3 at the end of freeze_late, so when it comes time to thaw() we'll already be in D0, and thus we can drop the explicit pci_set_power_state(D0) call. Presumably switcheroo suspend will want to go into D3 so call the _noirq() stuff from the switcheroo suspend hook, and since we dropped the pci_set_power_state(D0) from resume_early() we'll need to add one back into the switcheroo resume hook. Cc: Rodrigo Vivi Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 76 +++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 66f2acacd516..327e5fe7dfff 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1132,6 +1132,21 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) pci_disable_device(pdev); + return 0; + +fail: + enable_rpm_wakeref_asserts(rpm); + if (!dev_priv->uncore.user_forcewake_count) + intel_runtime_pm_driver_release(rpm); + + return ret; +} + +static int i915_drm_suspend_noirq(struct drm_device *dev, bool hibernation) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + /* * During hibernation on some platforms the BIOS may try to access * the device even though it's already in D3 and hang the machine. So @@ -1152,13 +1167,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) pci_set_power_state(pdev, PCI_D3hot); return 0; - -fail: - enable_rpm_wakeref_asserts(rpm); - if (!dev_priv->uncore.user_forcewake_count) - intel_runtime_pm_driver_release(rpm); - - return ret; } int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, @@ -1177,7 +1185,15 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, if (error) return error; - return i915_drm_suspend_late(&i915->drm, false); + error = i915_drm_suspend_late(&i915->drm, false); + if (error) + return error; + + error = i915_drm_suspend_noirq(&i915->drm, false); + if (error) + return error; + + return 0; } static int i915_drm_resume(struct drm_device *dev) @@ -1283,23 +1299,6 @@ static int i915_drm_resume_early(struct drm_device *dev) * similar so that power domains can be employed. */ - /* - * Note that we need to set the power state explicitly, since we - * powered off the device during freeze and the PCI core won't power - * it back up for us during thaw. Powering off the device during - * freeze is not a hard requirement though, and during the - * suspend/resume phases the PCI core makes sure we get here with the - * device powered on. So in case we change our freeze logic and keep - * the device powered we can also remove the following set power state - * call. - */ - ret = pci_set_power_state(pdev, PCI_D0); - if (ret) { - drm_err(&dev_priv->drm, - "failed to set PCI D0 power state (%d)\n", ret); - return ret; - } - /* * Note that pci_enable_device() first enables any parent bridge * device and only then sets the power state for this device. The @@ -1337,11 +1336,16 @@ static int i915_drm_resume_early(struct drm_device *dev) int i915_driver_resume_switcheroo(struct drm_i915_private *i915) { + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int ret; if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + ret = pci_set_power_state(pdev, PCI_D0); + if (ret) + return ret; + ret = i915_drm_resume_early(&i915->drm); if (ret) return ret; @@ -1398,6 +1402,16 @@ static int i915_pm_suspend_late(struct device *kdev) return i915_drm_suspend_late(&i915->drm, false); } +static int i915_pm_suspend_noirq(struct device *kdev) +{ + struct drm_i915_private *i915 = kdev_to_i915(kdev); + + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + return i915_drm_suspend_noirq(&i915->drm, false); +} + static int i915_pm_poweroff_late(struct device *kdev) { struct drm_i915_private *i915 = kdev_to_i915(kdev); @@ -1408,6 +1422,16 @@ static int i915_pm_poweroff_late(struct device *kdev) return i915_drm_suspend_late(&i915->drm, true); } +static int i915_pm_poweroff_noirq(struct device *kdev) +{ + struct drm_i915_private *i915 = kdev_to_i915(kdev); + + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + return i915_drm_suspend_noirq(&i915->drm, true); +} + static int i915_pm_resume_early(struct device *kdev) { struct drm_i915_private *i915 = kdev_to_i915(kdev); @@ -1673,6 +1697,7 @@ const struct dev_pm_ops i915_pm_ops = { .prepare = i915_pm_prepare, .suspend = i915_pm_suspend, .suspend_late = i915_pm_suspend_late, + .suspend_noirq = i915_pm_suspend_noirq, .resume_early = i915_pm_resume_early, .resume = i915_pm_resume, .complete = i915_pm_complete, @@ -1698,6 +1723,7 @@ const struct dev_pm_ops i915_pm_ops = { .thaw = i915_pm_thaw, .poweroff = i915_pm_suspend, .poweroff_late = i915_pm_poweroff_late, + .poweroff_noirq = i915_pm_poweroff_noirq, .restore_early = i915_pm_restore_early, .restore = i915_pm_restore, -- cgit v1.2.3 From 03a37f5c3a169bea3c25b1e07e632cd0ddbcf302 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:13 +0300 Subject: drm/i915/pm: Do pci_restore_state() in switcheroo resume hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since this switcheroo garbage bypasses all the core pm we have to manually manage the pci state. To that end add the missing pci_restore_state() to the switcheroo resume hook. We already have the pci_save_state() counterpart on the suspend side. Arguably none of this code should exist in the driver in the first place, and instead the entire switcheroo mechanism should be rewritten and properly integrated into core pm code... Reviewed-by: Rodrigo Vivi Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 327e5fe7dfff..009f4e27cf49 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1346,6 +1346,8 @@ int i915_driver_resume_switcheroo(struct drm_i915_private *i915) if (ret) return ret; + pci_restore_state(pdev); + ret = i915_drm_resume_early(&i915->drm); if (ret) return ret; -- cgit v1.2.3 From 3a3d9cb0b18df22d0d6466e3eca45cd0ad2ddbc8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:14 +0300 Subject: drm/i915/pm: Allow drivers/pci to manage our pci state normally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop doing the pci_save_state(), except when we need to prevent D3 due to BIOS bugs, so that the code in drivers/pci is allowed to manage the state of the PCI device. Less chance something getting left by the wayside by i915 if/when the things change in drivers/pci. Cc: Badal Nilawar Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 009f4e27cf49..0cb874e64971 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1162,9 +1162,8 @@ static int i915_drm_suspend_noirq(struct drm_device *dev, bool hibernation) * pci_save_state() prevents drivers/pci from * automagically putting the device into D3. */ - pci_save_state(pdev); - if (!(hibernation && GRAPHICS_VER(dev_priv) < 6)) - pci_set_power_state(pdev, PCI_D3hot); + if (hibernation && GRAPHICS_VER(dev_priv) < 6) + pci_save_state(pdev); return 0; } @@ -1172,6 +1171,7 @@ static int i915_drm_suspend_noirq(struct drm_device *dev, bool hibernation) int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) { + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int error; if (drm_WARN_ON_ONCE(&i915->drm, state.event != PM_EVENT_SUSPEND && @@ -1189,9 +1189,8 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, if (error) return error; - error = i915_drm_suspend_noirq(&i915->drm, false); - if (error) - return error; + pci_save_state(pdev); + pci_set_power_state(pdev, PCI_D3hot); return 0; } -- cgit v1.2.3 From 97fd25f8b6380525dafe4f3b8f7f215ac8560caf Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:50:15 +0300 Subject: drm/i915/pm: Drop redundant pci stuff from suspend/resume paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't think there should be any need for us to call any of pci_enable_device(), pci_disable_device() or pci_set_master() from the suspend/resume paths. The config space save/restore should take care of all of this. Cc: Karthik Poosa Reviewed-by: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919185015.14561-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_driver.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 0cb874e64971..95165e45de74 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1100,7 +1100,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_display *display = dev_priv->display; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct intel_gt *gt; int ret, i; @@ -1121,21 +1120,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) if (ret) { drm_err(&dev_priv->drm, "Suspend complete failed: %d\n", ret); intel_display_power_resume_early(display); - - goto fail; } enable_rpm_wakeref_asserts(rpm); - if (!dev_priv->uncore.user_forcewake_count) - intel_runtime_pm_driver_release(rpm); - - pci_disable_device(pdev); - - return 0; - -fail: - enable_rpm_wakeref_asserts(rpm); if (!dev_priv->uncore.user_forcewake_count) intel_runtime_pm_driver_release(rpm); @@ -1284,7 +1272,6 @@ static int i915_drm_resume_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_display *display = dev_priv->display; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct intel_gt *gt; int ret, i; @@ -1298,24 +1285,6 @@ static int i915_drm_resume_early(struct drm_device *dev) * similar so that power domains can be employed. */ - /* - * Note that pci_enable_device() first enables any parent bridge - * device and only then sets the power state for this device. The - * bridge enabling is a nop though, since bridge devices are resumed - * first. The order of enabling power and enabling the device is - * imposed by the PCI core as described above, so here we preserve the - * same order for the freeze/thaw phases. - * - * TODO: eventually we should remove pci_disable_device() / - * pci_enable_enable_device() from suspend/resume. Due to how they - * depend on the device enable refcount we can't anyway depend on them - * disabling/enabling the device. - */ - if (pci_enable_device(pdev)) - return -EIO; - - pci_set_master(pdev); - disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); ret = vlv_resume_prepare(dev_priv, false); -- cgit v1.2.3 From a1d0a0549d425cb21e2be4b7fd4f76e0379e6f2d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:48 +0300 Subject: drm/i915/dram: Also apply the 16Gb DIMM w/a for larger DRAM chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the spec only asks us to do the WM0 latency bump for 16Gb DRAM devices I believe we should apply it for larger DRAM chips. At the time the w/a was added there were no larger chips on the market, but I think I've seen at least 32Gb DDR4 chips being available these days. Whether it's possible to actually find suitable DIMMs for the affected systems with largers chips I don't know. Also it's not known whether the 1 usec latency bump would be sufficient for larger chips. Someone would need to find such DIMMs and test this. Fortunately we do have a bit of extra latency already with the 1 usec bump, as the actual requirement was .4 usec for for 16Gb chips. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 4 ++-- drivers/gpu/drm/i915/soc/intel_dram.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index d74cbb43ae6f..83ec42646e82 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3209,9 +3209,9 @@ adjust_wm_latency(struct intel_display *display, } /* - * WA Level-0 adjustment for 16Gb DIMMs: SKL+ + * WA Level-0 adjustment for 16Gb+ DIMMs: SKL+ * If we could not get dimm info enable this WA to prevent from - * any underrun. If not able to get DIMM info assume 16Gb DIMM + * any underrun. If not able to get DIMM info assume 16Gb+ DIMM * to avoid any underrun. */ if (!display->platform.dg2 && dram_info->has_16gb_dimms) diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index edffaed8f9a7..8841cfe1cac8 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -335,7 +335,7 @@ static bool skl_is_16gb_dimm(const struct dram_dimm_info *dimm) { /* Convert total Gb to Gb per DRAM device */ - return dimm->size / (intel_dimm_num_devices(dimm) ?: 1) == 16; + return dimm->size / (intel_dimm_num_devices(dimm) ?: 1) >= 16; } static void @@ -354,7 +354,7 @@ skl_dram_get_dimm_info(struct drm_i915_private *i915, } drm_dbg_kms(&i915->drm, - "CH%u DIMM %c size: %u Gb, width: X%u, ranks: %u, 16Gb DIMMs: %s\n", + "CH%u DIMM %c size: %u Gb, width: X%u, ranks: %u, 16Gb+ DIMMs: %s\n", channel, dimm_name, dimm->size, dimm->width, dimm->ranks, str_yes_no(skl_is_16gb_dimm(dimm))); } @@ -384,7 +384,7 @@ skl_dram_get_channel_info(struct drm_i915_private *i915, ch->is_16gb_dimm = skl_is_16gb_dimm(&ch->dimm_l) || skl_is_16gb_dimm(&ch->dimm_s); - drm_dbg_kms(&i915->drm, "CH%u ranks: %u, 16Gb DIMMs: %s\n", + drm_dbg_kms(&i915->drm, "CH%u ranks: %u, 16Gb+ DIMMs: %s\n", channel, ch->ranks, str_yes_no(ch->is_16gb_dimm)); return 0; @@ -406,7 +406,7 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram u32 val; int ret; - /* Assume 16Gb DIMMs are present until proven otherwise */ + /* Assume 16Gb+ DIMMs are present until proven otherwise */ dram_info->has_16gb_dimms = true; val = intel_uncore_read(&i915->uncore, @@ -438,7 +438,7 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram drm_dbg_kms(&i915->drm, "Memory configuration is symmetric? %s\n", str_yes_no(dram_info->symmetric_memory)); - drm_dbg_kms(&i915->drm, "16Gb DIMMs: %s\n", + drm_dbg_kms(&i915->drm, "16Gb+ DIMMs: %s\n", str_yes_no(dram_info->has_16gb_dimms)); return 0; -- cgit v1.2.3 From 8ebb8e1a0ef84bc672105d07b8d9fd3ebbda6427 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:49 +0300 Subject: drm/i915: Apply the 16Gb DIMM w/a only for the platforms that need it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the code assumes that every platform except dg2 need the 16Gb DIMM w/a, while in reality it's only needed by skl and icl (and derivatives). Switch to a more specific platform check. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 83ec42646e82..2bf334fe63c9 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3174,11 +3174,19 @@ void skl_watermark_ipc_init(struct intel_display *display) skl_watermark_ipc_update(display); } +static bool need_16gb_dimm_wa(struct intel_display *display) +{ + const struct dram_info *dram_info = intel_dram_info(display->drm); + + return (display->platform.skylake || display->platform.kabylake || + display->platform.coffeelake || display->platform.cometlake || + DISPLAY_VER(display) == 11) && dram_info->has_16gb_dimms; +} + static void adjust_wm_latency(struct intel_display *display, u16 wm[], int num_levels, int read_latency) { - const struct dram_info *dram_info = intel_dram_info(display->drm); int i, level; /* @@ -3214,7 +3222,7 @@ adjust_wm_latency(struct intel_display *display, * any underrun. If not able to get DIMM info assume 16Gb+ DIMM * to avoid any underrun. */ - if (!display->platform.dg2 && dram_info->has_16gb_dimms) + if (need_16gb_dimm_wa(display)) wm[0] += 1; } -- cgit v1.2.3 From 07816e8117a2ecb6a126264f3c16ca02668ca2be Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:50 +0300 Subject: drm/i915: Tweak the read latency fixup code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If WM0 latency is zero we need to bump it (and the WM1+ latencies) but a fixed amount. But any WM1+ level with zero latency must not be touched since that indicates that corresponding WM level isn't supported. Currently the loop doing that adjustment does work, but only because the previous loop modified the num_levels used as the loop boundary. This all seems a bit too fragile. Remove the num_levels adjustment and instead adjust the read latency loop to abort when it encounters a zero latency value. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2bf334fe63c9..2969cc58dabe 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3198,8 +3198,6 @@ adjust_wm_latency(struct intel_display *display, if (wm[level] == 0) { for (i = level + 1; i < num_levels; i++) wm[i] = 0; - - num_levels = level; break; } } @@ -3212,8 +3210,14 @@ adjust_wm_latency(struct intel_display *display, * from the punit when level 0 response data is 0us. */ if (wm[0] == 0) { - for (level = 0; level < num_levels; level++) + wm[0] += read_latency; + + for (level = 1; level < num_levels; level++) { + if (wm[level] == 0) + break; + wm[level] += read_latency; + } } /* -- cgit v1.2.3 From 76742daf75419eacdb4777980746f65f7c18a551 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:51 +0300 Subject: drm/i915: Don't pass the latency array to {skl,mtl}_read_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always operate on i915->display.wm.skl_latency in {skl,mtl}_read_wm_latency(). No real need for the caller to have to pass that in explicitly. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2969cc58dabe..43e481759e6e 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3184,9 +3184,10 @@ static bool need_16gb_dimm_wa(struct intel_display *display) } static void -adjust_wm_latency(struct intel_display *display, - u16 wm[], int num_levels, int read_latency) +adjust_wm_latency(struct intel_display *display, int read_latency) { + u16 *wm = display->wm.skl_latency; + int num_levels = display->wm.num_levels; int i, level; /* @@ -3230,9 +3231,9 @@ adjust_wm_latency(struct intel_display *display, wm[0] += 1; } -static void mtl_read_wm_latency(struct intel_display *display, u16 wm[]) +static void mtl_read_wm_latency(struct intel_display *display) { - int num_levels = display->wm.num_levels; + u16 *wm = display->wm.skl_latency; u32 val; val = intel_de_read(display, MTL_LATENCY_LP0_LP1); @@ -3247,12 +3248,12 @@ static void mtl_read_wm_latency(struct intel_display *display, u16 wm[]) wm[4] = REG_FIELD_GET(MTL_LATENCY_LEVEL_EVEN_MASK, val); wm[5] = REG_FIELD_GET(MTL_LATENCY_LEVEL_ODD_MASK, val); - adjust_wm_latency(display, wm, num_levels, 6); + adjust_wm_latency(display, 6); } -static void skl_read_wm_latency(struct intel_display *display, u16 wm[]) +static void skl_read_wm_latency(struct intel_display *display) { - int num_levels = display->wm.num_levels; + u16 *wm = display->wm.skl_latency; int read_latency = DISPLAY_VER(display) >= 12 ? 3 : 2; int mult = display->platform.dg2 ? 2 : 1; u32 val; @@ -3284,7 +3285,7 @@ static void skl_read_wm_latency(struct intel_display *display, u16 wm[]) wm[6] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val) * mult; wm[7] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val) * mult; - adjust_wm_latency(display, wm, num_levels, read_latency); + adjust_wm_latency(display, read_latency); } static void skl_setup_wm_latency(struct intel_display *display) @@ -3295,9 +3296,9 @@ static void skl_setup_wm_latency(struct intel_display *display) display->wm.num_levels = 8; if (DISPLAY_VER(display) >= 14) - mtl_read_wm_latency(display, display->wm.skl_latency); + mtl_read_wm_latency(display); else - skl_read_wm_latency(display, display->wm.skl_latency); + skl_read_wm_latency(display); intel_print_wm_latency(display, "Gen9 Plane", display->wm.skl_latency); } -- cgit v1.2.3 From e6619d22c8417c2630c37ed4e047c60e97b82110 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:52 +0300 Subject: drm/i915: Move adjust_wm_latency() out from {mtl,skl}_read_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit {mtl,skl}_read_wm_latency() are doing way too many things for my liking. Move the adjustment stuff out into the caller. This also gives us one place where we specify the 'read_latency' for all the platforms, instead of two places. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 43e481759e6e..2bc797aba2ec 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3183,12 +3183,22 @@ static bool need_16gb_dimm_wa(struct intel_display *display) DISPLAY_VER(display) == 11) && dram_info->has_16gb_dimms; } +static int wm_read_latency(struct intel_display *display) +{ + if (DISPLAY_VER(display) >= 14) + return 6; + else if (DISPLAY_VER(display) >= 12) + return 3; + else + return 2; +} + static void -adjust_wm_latency(struct intel_display *display, int read_latency) +adjust_wm_latency(struct intel_display *display) { u16 *wm = display->wm.skl_latency; - int num_levels = display->wm.num_levels; - int i, level; + int i, level, num_levels = display->wm.num_levels; + int read_latency = wm_read_latency(display); /* * If a level n (n > 1) has a 0us latency, all levels m (m >= n) @@ -3247,14 +3257,11 @@ static void mtl_read_wm_latency(struct intel_display *display) val = intel_de_read(display, MTL_LATENCY_LP4_LP5); wm[4] = REG_FIELD_GET(MTL_LATENCY_LEVEL_EVEN_MASK, val); wm[5] = REG_FIELD_GET(MTL_LATENCY_LEVEL_ODD_MASK, val); - - adjust_wm_latency(display, 6); } static void skl_read_wm_latency(struct intel_display *display) { u16 *wm = display->wm.skl_latency; - int read_latency = DISPLAY_VER(display) >= 12 ? 3 : 2; int mult = display->platform.dg2 ? 2 : 1; u32 val; int ret; @@ -3284,8 +3291,6 @@ static void skl_read_wm_latency(struct intel_display *display) wm[5] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_1_5_MASK, val) * mult; wm[6] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val) * mult; wm[7] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val) * mult; - - adjust_wm_latency(display, read_latency); } static void skl_setup_wm_latency(struct intel_display *display) @@ -3300,6 +3305,8 @@ static void skl_setup_wm_latency(struct intel_display *display) else skl_read_wm_latency(display); + adjust_wm_latency(display); + intel_print_wm_latency(display, "Gen9 Plane", display->wm.skl_latency); } -- cgit v1.2.3 From 91acc6317814d86258dc6ad9e3ef77164d9af148 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:53 +0300 Subject: drm/i915: Extract multiply_wm_latency() from skl_read_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I want skl_read_wm_latency() to just do what it says on the tin, ie. read the latency values from the pcode mailbox. Move the DG2 "multiply by two" trick elsewhere. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2bc797aba2ec..1ac94cb4f27d 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3174,6 +3174,15 @@ void skl_watermark_ipc_init(struct intel_display *display) skl_watermark_ipc_update(display); } +static void multiply_wm_latency(struct intel_display *display, int mult) +{ + u16 *wm = display->wm.skl_latency; + int level, num_levels = display->wm.num_levels; + + for (level = 0; level < num_levels; level++) + wm[level] *= mult; +} + static bool need_16gb_dimm_wa(struct intel_display *display) { const struct dram_info *dram_info = intel_dram_info(display->drm); @@ -3200,6 +3209,9 @@ adjust_wm_latency(struct intel_display *display) int i, level, num_levels = display->wm.num_levels; int read_latency = wm_read_latency(display); + if (display->platform.dg2) + multiply_wm_latency(display, 2); + /* * If a level n (n > 1) has a 0us latency, all levels m (m >= n) * need to be disabled. We make sure to sanitize the values out @@ -3262,7 +3274,6 @@ static void mtl_read_wm_latency(struct intel_display *display) static void skl_read_wm_latency(struct intel_display *display) { u16 *wm = display->wm.skl_latency; - int mult = display->platform.dg2 ? 2 : 1; u32 val; int ret; @@ -3274,10 +3285,10 @@ static void skl_read_wm_latency(struct intel_display *display) return; } - wm[0] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_0_4_MASK, val) * mult; - wm[1] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_1_5_MASK, val) * mult; - wm[2] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val) * mult; - wm[3] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val) * mult; + wm[0] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_0_4_MASK, val); + wm[1] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_1_5_MASK, val); + wm[2] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val); + wm[3] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val); /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ @@ -3287,10 +3298,10 @@ static void skl_read_wm_latency(struct intel_display *display) return; } - wm[4] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_0_4_MASK, val) * mult; - wm[5] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_1_5_MASK, val) * mult; - wm[6] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val) * mult; - wm[7] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val) * mult; + wm[4] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_0_4_MASK, val); + wm[5] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_1_5_MASK, val); + wm[6] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_2_6_MASK, val); + wm[7] = REG_FIELD_GET(GEN9_MEM_LATENCY_LEVEL_3_7_MASK, val); } static void skl_setup_wm_latency(struct intel_display *display) -- cgit v1.2.3 From 030778ab8d22cb52c53560d04ad3649cae4e18ff Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:54 +0300 Subject: drm/i915: Extract increase_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the "increase wm latencies by some amount" code into a helper that can be reused. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 1ac94cb4f27d..98ca592f6042 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3183,6 +3183,21 @@ static void multiply_wm_latency(struct intel_display *display, int mult) wm[level] *= mult; } +static void increase_wm_latency(struct intel_display *display, int inc) +{ + u16 *wm = display->wm.skl_latency; + int level, num_levels = display->wm.num_levels; + + wm[0] += inc; + + for (level = 1; level < num_levels; level++) { + if (wm[level] == 0) + break; + + wm[level] += inc; + } +} + static bool need_16gb_dimm_wa(struct intel_display *display) { const struct dram_info *dram_info = intel_dram_info(display->drm); @@ -3207,7 +3222,6 @@ adjust_wm_latency(struct intel_display *display) { u16 *wm = display->wm.skl_latency; int i, level, num_levels = display->wm.num_levels; - int read_latency = wm_read_latency(display); if (display->platform.dg2) multiply_wm_latency(display, 2); @@ -3232,16 +3246,8 @@ adjust_wm_latency(struct intel_display *display) * to add proper adjustment to each valid level we retrieve * from the punit when level 0 response data is 0us. */ - if (wm[0] == 0) { - wm[0] += read_latency; - - for (level = 1; level < num_levels; level++) { - if (wm[level] == 0) - break; - - wm[level] += read_latency; - } - } + if (wm[0] == 0) + increase_wm_latency(display, wm_read_latency(display)); /* * WA Level-0 adjustment for 16Gb+ DIMMs: SKL+ -- cgit v1.2.3 From 84953731f9170b3b51013f1f4deb59cc916dfa7b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:55 +0300 Subject: drm/i915: Use increase_wm_latency() for the 16Gb DIMM w/a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bump the latency for all watermark levels in the 16Gb+ DIMM w/a. The spec does ask us to do it only for level 0, but it seems more sane to bump all the levels. If the actual memory access is slower then the wakeup (WM1+) should also potentially happen earlier. This also avoids the theoretical case that WM0 would get bumped higher than WM1+. Not that it is likely to happen because the WM0 latency is always significantly lower than the WM1 latency. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 98ca592f6042..21dd15be74f9 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3256,7 +3256,7 @@ adjust_wm_latency(struct intel_display *display) * to avoid any underrun. */ if (need_16gb_dimm_wa(display)) - wm[0] += 1; + increase_wm_latency(display, 1); } static void mtl_read_wm_latency(struct intel_display *display) -- cgit v1.2.3 From e407ea78abdf20bb64998e178b7944dd297ffcb3 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:56 +0300 Subject: drm/i915: Extract sanitize_wm_latency() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the "zero out invalid WM latencies" stuff into a helper. Mainly to avoid mixing higher level and lower level stuff in the same adjust_wm_latency() function. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 21dd15be74f9..1acb9285bd05 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3217,14 +3217,10 @@ static int wm_read_latency(struct intel_display *display) return 2; } -static void -adjust_wm_latency(struct intel_display *display) +static void sanitize_wm_latency(struct intel_display *display) { u16 *wm = display->wm.skl_latency; - int i, level, num_levels = display->wm.num_levels; - - if (display->platform.dg2) - multiply_wm_latency(display, 2); + int level, num_levels = display->wm.num_levels; /* * If a level n (n > 1) has a 0us latency, all levels m (m >= n) @@ -3233,11 +3229,24 @@ adjust_wm_latency(struct intel_display *display) */ for (level = 1; level < num_levels; level++) { if (wm[level] == 0) { + int i; + for (i = level + 1; i < num_levels; i++) wm[i] = 0; - break; + return; } } +} + +static void +adjust_wm_latency(struct intel_display *display) +{ + u16 *wm = display->wm.skl_latency; + + if (display->platform.dg2) + multiply_wm_latency(display, 2); + + sanitize_wm_latency(display); /* * WaWmMemoryReadLatency -- cgit v1.2.3 From 15bdae1072061db72027192badb276f95094da5f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:57 +0300 Subject: drm/i915: Flatten sanitize_wm_latency() a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the inner loop out from the outer loop in sanitize_wm_latency() to flatten things a bit. Easier to read flat code. v2: Move the inner loop out completely (Luca) Cc: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-11-ville.syrjala@linux.intel.com Reviewed-by: Luca Coelho --- drivers/gpu/drm/i915/display/skl_watermark.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 1acb9285bd05..d83772c6ea9a 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3228,14 +3228,12 @@ static void sanitize_wm_latency(struct intel_display *display) * of the punit to satisfy this requirement. */ for (level = 1; level < num_levels; level++) { - if (wm[level] == 0) { - int i; - - for (i = level + 1; i < num_levels; i++) - wm[i] = 0; - return; - } + if (wm[level] == 0) + break; } + + for (level = level + 1; level < num_levels; level++) + wm[level] = 0; } static void -- cgit v1.2.3 From d49564a5f7e23f08486697080e0006c606c5ebd2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:58 +0300 Subject: drm/i915: Make wm latencies monotonic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some systems (eg. LNL Lenovo Thinkapd X1 Carbon) declare semi-bogus non-monotonic WM latency values: WM0 latency not provided WM1 latency 100 usec WM2 latency 100 usec WM3 latency 100 usec WM4 latency 93 usec WM5 latency 100 usec Apparently Windows just papers over the issue by bumping the latencies for the higher watermark levels to make them monotonic again. Do the same. Cc: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-12-ville.syrjala@linux.intel.com Reviewed-by: Luca Coelho --- drivers/gpu/drm/i915/display/skl_watermark.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index d83772c6ea9a..2a40c135cb96 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3236,6 +3236,19 @@ static void sanitize_wm_latency(struct intel_display *display) wm[level] = 0; } +static void make_wm_latency_monotonic(struct intel_display *display) +{ + u16 *wm = display->wm.skl_latency; + int level, num_levels = display->wm.num_levels; + + for (level = 1; level < num_levels; level++) { + if (wm[level] == 0) + break; + + wm[level] = max(wm[level], wm[level-1]); + } +} + static void adjust_wm_latency(struct intel_display *display) { @@ -3246,6 +3259,8 @@ adjust_wm_latency(struct intel_display *display) sanitize_wm_latency(display); + make_wm_latency_monotonic(display); + /* * WaWmMemoryReadLatency * -- cgit v1.2.3 From b86cb7beed45e30fd5701ac58866f57d99a55fa9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:29:59 +0300 Subject: drm/i915: Print both the original and adjusted wm latencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to help with debugging print both the original wm latencies read from the mailbox/etc., and the final fixed/adjusted values. Reviewed-by: Luca Coelho Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2a40c135cb96..9d58c28d3bdf 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -3344,9 +3344,11 @@ static void skl_setup_wm_latency(struct intel_display *display) else skl_read_wm_latency(display); + intel_print_wm_latency(display, "original", display->wm.skl_latency); + adjust_wm_latency(display); - intel_print_wm_latency(display, "Gen9 Plane", display->wm.skl_latency); + intel_print_wm_latency(display, "adjusted", display->wm.skl_latency); } static struct intel_global_state *intel_dbuf_duplicate_state(struct intel_global_obj *obj) -- cgit v1.2.3 From 840f6b9e480c514d8147abfb729c95a19ea1b2d2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 22:30:00 +0300 Subject: drm/i915: Make sure wm block/lines are non-decreasing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watermark algorithm sometimes produces results where higher watermark levels have smaller blocks/lines watermarks than the lower levels. That doesn't really make sense as the corresponding latencies are supposed to be non-decreasing. It's unclear how the hardware responds to such watermark values, so it seems better to avoid that case and just make sure the values are always non-decreasing. Here's an example how things change for such a case on a GLK NUC: [PLANE:70:cursor A] level wm0, wm1, wm2, wm3, wm4, wm5, wm6, wm7, twm, swm, stwm -> *wm0,*wm1,*wm2,*wm3,*wm4,*wm5,*wm6,*wm7,*twm, swm, stwm [PLANE:70:cursor A] lines 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 4, 4, 4, 2, 2, 2, 2, 2, 0, 0, 0 [PLANE:70:cursor A] blocks 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 11, 11, 12, 7, 7, 7, 7, 7, 25, 0, 0 [PLANE:70:cursor A] min_ddb 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 12, 12, 13, 8, 8, 8, 8, 8, 26, 0, 0 -> [PLANE:70:cursor A] level wm0, wm1, wm2, wm3, wm4, wm5, wm6, wm7, twm, swm, stwm -> *wm0,*wm1,*wm2,*wm3,*wm4,*wm5,*wm6,*wm7,*twm, swm, stwm [PLANE:70:cursor A] lines 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0 [PLANE:70:cursor A] blocks 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 11, 11, 12, 12, 12, 12, 12, 12, 25, 0, 0 [PLANE:70:cursor A] min_ddb 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -> 12, 12, 13, 13, 13, 13, 13, 13, 26, 0, 0 Whether this actually helps on any display blinking issues is unclear. Reviewed-by: Luca Coelho References: https://gitlab.freedesktop.org/drm/intel/-/issues/8683 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919193000.17665-14-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 9d58c28d3bdf..9eb28d935757 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1878,18 +1878,21 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, } else { blocks++; } - - /* - * Make sure result blocks for higher latency levels are - * at least as high as level below the current level. - * Assumption in DDB algorithm optimization for special - * cases. Also covers Display WA #1125 for RC. - */ - if (result_prev->blocks > blocks) - blocks = result_prev->blocks; } } + /* + * Make sure result blocks for higher latency levels are + * at least as high as level below the current level. + * Assumption in DDB algorithm optimization for special + * cases. Also covers Display WA #1125 for RC. + * + * Let's always do this as the algorithm can give non + * monotonic results on any platform. + */ + blocks = max_t(u32, blocks, result_prev->blocks); + lines = max_t(u32, lines, result_prev->lines); + if (DISPLAY_VER(display) >= 11) { if (wp->y_tiled) { int extra_lines; -- cgit v1.2.3 From 32620e176443bf23ec81bfe8f177c6721a904864 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Mon, 22 Sep 2025 20:33:15 +0530 Subject: drm/pcids: Split PTL pciids group to make wcl subplatform To form the WCL platform as a subplatform of PTL in definition, WCL pci ids are splited into saparate group from PTL. So update the pciidlist struct to cover all the pci ids. v2: - Squash wcl description in single patch for display and xe.(jani,gustavo) Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Gustavo Sousa Signed-off-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250922150317.2334680-2-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.c | 1 + drivers/gpu/drm/xe/xe_pci.c | 1 + include/drm/intel/pciids.h | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index a002bc6ce7b0..a9a36176096f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1482,6 +1482,7 @@ static const struct { INTEL_LNL_IDS(INTEL_DISPLAY_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_DISPLAY_DEVICE, &bmg_desc), INTEL_PTL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), + INTEL_WCL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), }; static const struct { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 046d330bad34..7ae316534eb6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -374,6 +374,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc), + INTEL_WCL_IDS(INTEL_VGA_DEVICE, &ptl_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index da6301a6fcea..69d4ae92d822 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -877,7 +877,10 @@ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ - MACRO__(0xB0B0, ## __VA_ARGS__), \ + MACRO__(0xB0B0, ## __VA_ARGS__) + +/* WCL */ +#define INTEL_WCL_IDS(MACRO__, ...) \ MACRO__(0xFD80, ## __VA_ARGS__), \ MACRO__(0xFD81, ## __VA_ARGS__) -- cgit v1.2.3 From 4dfaae643e59cf3ab71b88689dce1b874f036f00 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Mon, 22 Sep 2025 20:33:16 +0530 Subject: drm/i915/display: Add definition for wcl as subplatform We will need to differentiate between WCL and PTL in intel_encoder_is_c10phy(). Since WCL and PTL use the same display architecture, let's define WCL as a subplatform of PTL to allow the differentiation. v2: Update commit message and reorder wcl define (Gustavo) Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Gustavo Sousa Signed-off-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250922150317.2334680-3-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.c | 12 ++++++++++++ drivers/gpu/drm/i915/display/intel_display_device.h | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index a9a36176096f..f3f1f25b0f38 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1404,8 +1404,20 @@ static const struct platform_desc bmg_desc = { PLATFORM_GROUP(dgfx), }; +static const u16 wcl_ids[] = { + INTEL_WCL_IDS(ID), + 0 +}; + static const struct platform_desc ptl_desc = { PLATFORM(pantherlake), + .subplatforms = (const struct subplatform_desc[]) { + { + SUBPLATFORM(pantherlake, wildcatlake), + .pciidlist = wcl_ids, + }, + {}, + } }; __diag_pop(); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 1f091fbcd0ec..0e062753cf9b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -101,7 +101,9 @@ struct pci_dev; /* Display ver 14.1 (based on GMD ID) */ \ func(battlemage) \ /* Display ver 30 (based on GMD ID) */ \ - func(pantherlake) + func(pantherlake) \ + func(pantherlake_wildcatlake) + #define __MEMBER(name) unsigned long name:1; #define __COUNT(x) 1 + -- cgit v1.2.3 From 8147f7a1c083fd565fb958824f7c552de3b2dc46 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Mon, 22 Sep 2025 20:33:17 +0530 Subject: drm/i915/xe3: Restrict PTL intel_encoder_is_c10phy() to only PHY A On PTL, no combo PHY is connected to PORT B. However, PORT B can still be used for Type-C and will utilize the C20 PHY for eDP over Type-C. In such configurations, VBTs also enumerate PORT B. This leads to issues where PORT B is incorrectly identified as using the C10 PHY, due to the assumption that returning true for PORT B in intel_encoder_is_c10phy() would not cause problems. From PTL's perspective, only PORT A/PHY A uses the C10 PHY. Update the helper intel_encoder_is_c10phy() to return true only for PORT A/PHY on PTL. v2: Change the condition code style for ptl/wcl Bspec: 72571,73944 Fixes: 9d10de78a37f ("drm/i915/wcl: C10 phy connected to port A and B") Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Gustavo Sousa Signed-off-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250922150317.2334680-4-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 801235a5bc0a..a2d2cecf7121 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -39,14 +39,12 @@ bool intel_encoder_is_c10phy(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); - /* PTL doesn't have a PHY connected to PORT B; as such, - * there will never be a case where PTL uses PHY B. - * WCL uses PORT A and B with the C10 PHY. - * Reusing the condition for WCL and extending it for PORT B - * should not cause any issues for PTL. - */ - if (display->platform.pantherlake && phy < PHY_C) - return true; + if (display->platform.pantherlake) { + if (display->platform.pantherlake_wildcatlake) + return phy <= PHY_B; + else + return phy == PHY_A; + } if ((display->platform.lunarlake || display->platform.meteorlake) && phy < PHY_C) return true; -- cgit v1.2.3 From 3dc42238788bc70b94d946dfd4683ebf30a3252b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 23 Sep 2025 17:31:04 +0300 Subject: drm/i915/irq: drop intel_psr_regs.h include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915_irq.c no longer needs display/intel_psr_regs.h. Drop it. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/29752bb1942fc2ceceb5140bb49f67e44e1b0676.1758637773.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7c7c6dcbce88..56f231591a3e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -38,7 +38,6 @@ #include "display/intel_hotplug.h" #include "display/intel_hotplug_irq.h" #include "display/intel_lpe_audio.h" -#include "display/intel_psr_regs.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_gt.h" -- cgit v1.2.3 From 381f04d8c0276c703956c749b3c333eb32acf668 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 23 Sep 2025 17:31:05 +0300 Subject: drm/i915/irq: initialize gen2_imr_mask in terms of enable_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of initializing gen2_imr_mask and enable_mask independently, use the latter for initializing the former. This also highlights the differences in the masks, i.e. what's set to enable_mask after it's been used to initialize gen2_imr_mask. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/e3b612ce4decea699bde2c52aeaef48bf95f7abc.1758637773.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 56f231591a3e..04de02fc08d9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -895,26 +895,20 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i9xx_error_mask(dev_priv)); - dev_priv->gen2_imr_mask = - ~(I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_MASTER_ERROR_INTERRUPT); - enable_mask = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_MASTER_ERROR_INTERRUPT | - I915_USER_INTERRUPT; + I915_MASTER_ERROR_INTERRUPT; - if (DISPLAY_VER(display) >= 3) { - dev_priv->gen2_imr_mask &= ~I915_ASLE_INTERRUPT; + if (DISPLAY_VER(display) >= 3) enable_mask |= I915_ASLE_INTERRUPT; - } - if (HAS_HOTPLUG(display)) { - dev_priv->gen2_imr_mask &= ~I915_DISPLAY_PORT_INTERRUPT; + if (HAS_HOTPLUG(display)) enable_mask |= I915_DISPLAY_PORT_INTERRUPT; - } + + dev_priv->gen2_imr_mask = ~enable_mask; + + enable_mask |= I915_USER_INTERRUPT; gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->gen2_imr_mask, enable_mask); @@ -1016,20 +1010,16 @@ static void i965_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i965_error_mask(dev_priv)); - dev_priv->gen2_imr_mask = - ~(I915_ASLE_INTERRUPT | - I915_DISPLAY_PORT_INTERRUPT | - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_MASTER_ERROR_INTERRUPT); - enable_mask = I915_ASLE_INTERRUPT | I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_MASTER_ERROR_INTERRUPT | - I915_USER_INTERRUPT; + I915_MASTER_ERROR_INTERRUPT; + + dev_priv->gen2_imr_mask = ~enable_mask; + + enable_mask |= I915_USER_INTERRUPT; if (IS_G4X(dev_priv)) enable_mask |= I915_BSD_USER_INTERRUPT; -- cgit v1.2.3 From d54c636db529d2c73e49a7be8a55afd530977f5c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 23 Sep 2025 17:31:06 +0300 Subject: drm/i915/irq: abstract i9xx_display_irq_enable_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Figure out the enable mask for display things in display code. Reuse the same function for both i915 and i965 code, the end result remains the same. This removes a pair of DISPLAY_VER() and HAS_HOTPLUG() checks from core irq code. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/dd7cd63a4019ff24098d565b67ea827df6b9ed45.1758637773.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_irq.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/display/intel_display_irq.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 16 ++-------------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index c6f367e6159e..4d51900123ea 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1900,6 +1900,22 @@ void i9xx_display_irq_reset(struct intel_display *display) i9xx_pipestat_irq_reset(display); } +u32 i9xx_display_irq_enable_mask(struct intel_display *display) +{ + u32 enable_mask; + + enable_mask = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + + if (DISPLAY_VER(display) >= 3) + enable_mask |= I915_ASLE_INTERRUPT; + + if (HAS_HOTPLUG(display)) + enable_mask |= I915_DISPLAY_PORT_INTERRUPT; + + return enable_mask; +} + void i915_display_irq_postinstall(struct intel_display *display) { /* diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h index cee120347064..e44d88e0d7e7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.h +++ b/drivers/gpu/drm/i915/display/intel_display_irq.h @@ -61,6 +61,7 @@ void vlv_display_irq_reset(struct intel_display *display); void gen8_display_irq_reset(struct intel_display *display); void gen11_display_irq_reset(struct intel_display *display); +u32 i9xx_display_irq_enable_mask(struct intel_display *display); void i915_display_irq_postinstall(struct intel_display *display); void i965_display_irq_postinstall(struct intel_display *display); void vlv_display_irq_postinstall(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 04de02fc08d9..f9fbb88b9e26 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -895,17 +895,9 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i9xx_error_mask(dev_priv)); - enable_mask = - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + enable_mask = i9xx_display_irq_enable_mask(display) | I915_MASTER_ERROR_INTERRUPT; - if (DISPLAY_VER(display) >= 3) - enable_mask |= I915_ASLE_INTERRUPT; - - if (HAS_HOTPLUG(display)) - enable_mask |= I915_DISPLAY_PORT_INTERRUPT; - dev_priv->gen2_imr_mask = ~enable_mask; enable_mask |= I915_USER_INTERRUPT; @@ -1010,11 +1002,7 @@ static void i965_irq_postinstall(struct drm_i915_private *dev_priv) gen2_error_init(uncore, GEN2_ERROR_REGS, ~i965_error_mask(dev_priv)); - enable_mask = - I915_ASLE_INTERRUPT | - I915_DISPLAY_PORT_INTERRUPT | - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + enable_mask = i9xx_display_irq_enable_mask(display) | I915_MASTER_ERROR_INTERRUPT; dev_priv->gen2_imr_mask = ~enable_mask; -- cgit v1.2.3 From c39d3e2dd9dc97f15546e634934a47a81d95c44c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 23 Sep 2025 17:31:07 +0300 Subject: drm/i915/irq: move check for HAS_HOTPLUG() inside i9xx_hpd_irq_ack() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to avoid using the display dependent HAS_HOTPLUG() in generic irq code. Since the enabling of I915_DISPLAY_PORT_INTERRUPT depends on HAS_HOTPLUG() to begin with, we don't really expect to get the irqs for !HAS_HOTPLUG(). At least in theory, checking for HAS_HOTPLUG() inside i9xx_hpd_irq_ack() should not have any impact. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/2f97c077e67667bf420196c7381553d5286da958.1758637773.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hotplug_irq.c | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index 4f72f3fb9af5..9a4da818ad61 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -420,6 +420,9 @@ u32 i9xx_hpd_irq_ack(struct intel_display *display) u32 hotplug_status = 0, hotplug_status_mask; int i; + if (!HAS_HOTPLUG(display)) + return 0; + if (display->platform.g4x || display->platform.valleyview || display->platform.cherryview) hotplug_status_mask = HOTPLUG_INT_STATUS_G4X | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f9fbb88b9e26..90174ce9195c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -931,8 +931,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) ret = IRQ_HANDLED; - if (HAS_HOTPLUG(display) && - iir & I915_DISPLAY_PORT_INTERRUPT) + if (iir & I915_DISPLAY_PORT_INTERRUPT) hotplug_status = i9xx_hpd_irq_ack(display); /* Call regardless, as some status bits might not be -- cgit v1.2.3 From c989cb4c64edca2516304c9d29011f1066ca471a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 23 Sep 2025 17:31:08 +0300 Subject: drm/i915/irq: split ILK display irq handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out display irq handling on ilk. Since the master IRQ enable is in DEIIR, we'll need to do this in two parts. First, add ilk_display_irq_master_disable() to disable master and south interrupts, and second, add (repurposed) ilk_display_irq_handler() to finish display irq handling. It's not the prettiest thing you ever saw, but improves separation of display irq handling. And removes HAS_PCH_NOP() and DISPLAY_VER() checks from core irq code. v2: - Separate ilk_display_irq_master_enable() (Ville) - Use _fw mmio accessors (Ville) Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/e8ea7c985c3f3a80870f3333bde2e1bf30d653b0.1758637773.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_irq.c | 51 +++++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_display_irq.h | 5 ++- drivers/gpu/drm/i915/i915_irq.c | 31 +++----------- 3 files changed, 58 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 4d51900123ea..e1a812f6159b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -872,7 +872,7 @@ static void ilk_gtt_fault_irq_handler(struct intel_display *display) } } -void ilk_display_irq_handler(struct intel_display *display, u32 de_iir) +static void _ilk_display_irq_handler(struct intel_display *display, u32 de_iir) { enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; @@ -923,7 +923,7 @@ void ilk_display_irq_handler(struct intel_display *display, u32 de_iir) ilk_display_rps_irq_handler(display); } -void ivb_display_irq_handler(struct intel_display *display, u32 de_iir) +static void _ivb_display_irq_handler(struct intel_display *display, u32 de_iir) { enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; @@ -972,6 +972,53 @@ void ivb_display_irq_handler(struct intel_display *display, u32 de_iir) } } +void ilk_display_irq_master_disable(struct intel_display *display, u32 *de_ier, u32 *sde_ier) +{ + /* disable master interrupt before clearing iir */ + *de_ier = intel_de_read_fw(display, DEIER); + intel_de_write_fw(display, DEIER, *de_ier & ~DE_MASTER_IRQ_CONTROL); + + /* + * Disable south interrupts. We'll only write to SDEIIR once, so further + * interrupts will be stored on its back queue, and then we'll be able + * to process them after we restore SDEIER (as soon as we restore it, + * we'll get an interrupt if SDEIIR still has something to process due + * to its back queue). + */ + if (!HAS_PCH_NOP(display)) { + *sde_ier = intel_de_read_fw(display, SDEIER); + intel_de_write_fw(display, SDEIER, 0); + } else { + *sde_ier = 0; + } +} + +void ilk_display_irq_master_enable(struct intel_display *display, u32 de_ier, u32 sde_ier) +{ + intel_de_write_fw(display, DEIER, de_ier); + + if (sde_ier) + intel_de_write_fw(display, SDEIER, sde_ier); +} + +bool ilk_display_irq_handler(struct intel_display *display) +{ + u32 de_iir; + bool handled = false; + + de_iir = intel_de_read_fw(display, DEIIR); + if (de_iir) { + intel_de_write_fw(display, DEIIR, de_iir); + if (DISPLAY_VER(display) >= 7) + _ivb_display_irq_handler(display, de_iir); + else + _ilk_display_irq_handler(display, de_iir); + handled = true; + } + + return handled; +} + static u32 gen8_de_port_aux_mask(struct intel_display *display) { u32 mask; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h index e44d88e0d7e7..84acd31948cf 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.h +++ b/drivers/gpu/drm/i915/display/intel_display_irq.h @@ -47,8 +47,9 @@ void i965_disable_vblank(struct drm_crtc *crtc); void ilk_disable_vblank(struct drm_crtc *crtc); void bdw_disable_vblank(struct drm_crtc *crtc); -void ivb_display_irq_handler(struct intel_display *display, u32 de_iir); -void ilk_display_irq_handler(struct intel_display *display, u32 de_iir); +void ilk_display_irq_master_disable(struct intel_display *display, u32 *de_ier, u32 *sde_ier); +void ilk_display_irq_master_enable(struct intel_display *display, u32 de_ier, u32 sde_ier); +bool ilk_display_irq_handler(struct intel_display *display); void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl); void gen11_display_irq_handler(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 90174ce9195c..11a727b74849 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -414,7 +414,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) struct drm_i915_private *i915 = arg; struct intel_display *display = i915->display; void __iomem * const regs = intel_uncore_regs(&i915->uncore); - u32 de_iir, gt_iir, de_ier, sde_ier = 0; + u32 gt_iir, de_ier = 0, sde_ier = 0; irqreturn_t ret = IRQ_NONE; if (unlikely(!intel_irqs_enabled(i915))) @@ -423,19 +423,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) /* IRQs are synced during runtime_suspend, we don't require a wakeref */ disable_rpm_wakeref_asserts(&i915->runtime_pm); - /* disable master interrupt before clearing iir */ - de_ier = raw_reg_read(regs, DEIER); - raw_reg_write(regs, DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); - - /* Disable south interrupts. We'll only write to SDEIIR once, so further - * interrupts will will be stored on its back queue, and then we'll be - * able to process them after we restore SDEIER (as soon as we restore - * it, we'll get an interrupt if SDEIIR still has something to process - * due to its back queue). */ - if (!HAS_PCH_NOP(display)) { - sde_ier = raw_reg_read(regs, SDEIER); - raw_reg_write(regs, SDEIER, 0); - } + /* Disable master and south interrupts */ + ilk_display_irq_master_disable(display, &de_ier, &sde_ier); /* Find, clear, then process each source of interrupt */ @@ -449,15 +438,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) ret = IRQ_HANDLED; } - de_iir = raw_reg_read(regs, DEIIR); - if (de_iir) { - raw_reg_write(regs, DEIIR, de_iir); - if (DISPLAY_VER(display) >= 7) - ivb_display_irq_handler(display, de_iir); - else - ilk_display_irq_handler(display, de_iir); + if (ilk_display_irq_handler(display)) ret = IRQ_HANDLED; - } if (GRAPHICS_VER(i915) >= 6) { u32 pm_iir = raw_reg_read(regs, GEN6_PMIIR); @@ -468,9 +450,8 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) } } - raw_reg_write(regs, DEIER, de_ier); - if (sde_ier) - raw_reg_write(regs, SDEIER, sde_ier); + /* Re-enable master and south interrupts */ + ilk_display_irq_master_enable(display, de_ier, sde_ier); pmu_irq_stats(i915, ret); -- cgit v1.2.3 From d90c0a5ccdb48780a839b6ee4e3a569a445f4f2a Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Tue, 2 Sep 2025 17:58:50 +0530 Subject: drm/i915/vrr: Refactor VRR live status wait into common helper Add a helper to consolidate timeout handling and error logging when waiting for VRR live status to clear. Log an error message if the VRR live status bit fails to clear within the timeout. Signed-off-by: Ankit Nautiyal Reviewed-by: Mitul Golani Link: https://lore.kernel.org/r/20250902122850.3649828-1-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 9e007aab1452..98d28de2e451 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -644,6 +644,15 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) } } +static void intel_vrr_wait_for_live_status_clear(struct intel_display *display, + enum transcoder cpu_transcoder) +{ + if (intel_de_wait_for_clear(display, + TRANS_VRR_STATUS(display, cpu_transcoder), + VRR_STATUS_VRR_EN_LIVE, 1000)) + drm_err(display->drm, "Timed out waiting for VRR live status to clear\n"); +} + void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) { struct intel_display *display = to_intel_display(old_crtc_state); @@ -655,9 +664,7 @@ void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) if (!intel_vrr_always_use_vrr_tg(display)) { intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(old_crtc_state)); - intel_de_wait_for_clear(display, - TRANS_VRR_STATUS(display, cpu_transcoder), - VRR_STATUS_VRR_EN_LIVE, 1000); + intel_vrr_wait_for_live_status_clear(display, cpu_transcoder); intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); } @@ -703,8 +710,8 @@ void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), 0); - intel_de_wait_for_clear(display, TRANS_VRR_STATUS(display, cpu_transcoder), - VRR_STATUS_VRR_EN_LIVE, 1000); + intel_vrr_wait_for_live_status_clear(display, cpu_transcoder); + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); } -- cgit v1.2.3 From c2e04017fb0b40f7b5ac1db4e9ab49e84b74f567 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 11:51:29 +0300 Subject: drm/i915/gem: add i915_gem_fence_wait_priority_display() helper Add i915_gem_fence_wait_priority_display() helper to wait with I915_PRIORITY_DISPLAY. This drops the intel_plane.c dependency on i915_scheduler_types.h, and allows us to remove the compat header from xe. Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250924085129.146173-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_plane.c | 5 +---- drivers/gpu/drm/i915/gem/i915_gem_object.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_wait.c | 7 +++++++ .../gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h | 4 +--- .../gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h | 13 ------------- 5 files changed, 10 insertions(+), 20 deletions(-) delete mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 2329f09d413d..19d4640c0e53 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -45,7 +45,6 @@ #include #include "gem/i915_gem_object.h" -#include "i915_scheduler_types.h" #include "i9xx_plane_regs.h" #include "intel_cdclk.h" #include "intel_cursor.h" @@ -1172,7 +1171,6 @@ static int intel_prepare_plane_fb(struct drm_plane *_plane, struct drm_plane_state *_new_plane_state) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY }; struct intel_plane *plane = to_intel_plane(_plane); struct intel_display *display = to_intel_display(plane); struct intel_plane_state *new_plane_state = @@ -1221,8 +1219,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, goto unpin_fb; if (new_plane_state->uapi.fence) { - i915_gem_fence_wait_priority(new_plane_state->uapi.fence, - &attr); + i915_gem_fence_wait_priority_display(new_plane_state->uapi.fence); intel_display_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 148034ef504d..8878539c10ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -802,6 +802,7 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj) void i915_gem_fence_wait_priority(struct dma_fence *fence, const struct i915_sched_attr *attr); +void i915_gem_fence_wait_priority_display(struct dma_fence *fence); int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 54829801d3f7..2893df65c359 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -138,6 +138,13 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence, local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } +void i915_gem_fence_wait_priority_display(struct dma_fence *fence) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY }; + + i915_gem_fence_wait_priority(fence, &attr); +} + int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 8a048980ea38..0548b2e0316f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -5,10 +5,8 @@ #define __I915_GEM_OBJECT_H__ struct dma_fence; -struct i915_sched_attr; -static inline void i915_gem_fence_wait_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static inline void i915_gem_fence_wait_priority_display(struct dma_fence *fence) { } diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h deleted file mode 100644 index c11130440d31..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* Copyright © 2025 Intel Corporation */ - -#ifndef __I915_SCHEDULER_TYPES_H__ -#define __I915_SCHEDULER_TYPES_H__ - -#define I915_PRIORITY_DISPLAY 0 - -struct i915_sched_attr { - int priority; -}; - -#endif -- cgit v1.2.3 From 6131428a47537efd10d87ec77ee621deedb93763 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:33 +0530 Subject: drm/i915/psr: s/intel_psr_min_vblank_delay/intel_psr_min_set_context_latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_psr_min_vblank_delay to intel_psr_min_set_context_latency to reflect that it provides the minimum value for 'Set context latency'(SCL or Window W2) for PSR/Panel Replay to work correctly across different platforms. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-2-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_psr.c | 6 +++--- drivers/gpu/drm/i915/display/intel_psr.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 18b9baa96241..679c2a9baaee 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2369,7 +2369,7 @@ static int intel_crtc_vblank_delay(const struct intel_crtc_state *crtc_state) if (!HAS_DSB(display)) return 0; - vblank_delay = max(vblank_delay, intel_psr_min_vblank_delay(crtc_state)); + vblank_delay = max(vblank_delay, intel_psr_min_set_context_latency(crtc_state)); return vblank_delay; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 01bf304c705f..49ccd0864c55 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2360,12 +2360,12 @@ void intel_psr_trigger_frame_change_event(struct intel_dsb *dsb, } /** - * intel_psr_min_vblank_delay - Minimum vblank delay needed by PSR + * intel_psr_min_set_context_latency - Minimum 'set context latency' lines needed by PSR * @crtc_state: the crtc state * - * Return minimum vblank delay needed by PSR. + * Return minimum SCL lines/delay needed by PSR. */ -int intel_psr_min_vblank_delay(const struct intel_crtc_state *crtc_state) +int intel_psr_min_set_context_latency(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 077751aa599f..9147996d6c9e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -77,7 +77,7 @@ void intel_psr_unlock(const struct intel_crtc_state *crtc_state); void intel_psr_trigger_frame_change_event(struct intel_dsb *dsb, struct intel_atomic_state *state, struct intel_crtc *crtc); -int intel_psr_min_vblank_delay(const struct intel_crtc_state *crtc_state); +int intel_psr_min_set_context_latency(const struct intel_crtc_state *crtc_state); void intel_psr_connector_debugfs_add(struct intel_connector *connector); void intel_psr_debugfs_register(struct intel_display *display); bool intel_psr_needs_alpm(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); -- cgit v1.2.3 From 419daf7d83b04c04c261cd729683cdbbd153bda3 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:34 +0530 Subject: drm/i915/display: Add set_context_latency to crtc_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'Set context latency' (SCL, Window W2) is defined as the number of lines before the double buffering point, which are required to complete programming of the registers, typically when DSB is used to program the display registers. Since we are not using this window for programming the registers, this is mostly set to 0, unless there is a requirement for few cases related to PSR/PR where the 'set context latency' should be at least 1. Currently we are using the 'set context latency' (if required) implicitly by moving the vblank start by the required amount and then measuring the delay i.e. the difference between undelayed vblank start and delayed vblank start. Since our guardband matches the vblank length, this was not a problem as the difference between the undelayed vblank and delayed vblank was at the most equal to the 'set context latency' lines. However, if we want to optimize the guardband, the difference between the undelayed and the delayed vblank will be large and we cannot use this difference as the 'set context latency' lines. To make way for this optimization of guardband, formally introduce the 'set context latency' or SCL and track it as a new member `set_context_latency` of the structure intel_crtc_state. Eventually, all references of vblank delay where we mean to use set context latency will be replaced by this new `set_context_latency` member. Note: for TGL the TRANS_SET_CONTEXT_LATENCY doesn't exist to account for the SCL. However, the VBLANK_START-VACTIVE difference plays an identical role here ie. it can be used to create the SCL window ahead of the undelayed vblank. While readback since there is no specific register to read out the SCL, use the difference between vblank start and vactive to populate the new member for TGL. v2: - Use u16 for set_context_latency. (Ville) - s/vblank_delay/set_context_latency. (Ville) - Meld the changes for TGL with this change. (Ville) v3: - Update comment to clarify the TGL case. (Ville) - Fix typo in commit message. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-3-ankit.k.nautiyal@intel.com --- .../gpu/drm/i915/display/intel_crtc_state_dump.c | 5 +- drivers/gpu/drm/i915/display/intel_display.c | 56 +++++++++++++++------- drivers/gpu/drm/i915/display/intel_display_types.h | 3 ++ 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 0c7f91046996..a14bcda4446c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -289,10 +289,11 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_printf(&p, "scanline offset: %d\n", intel_crtc_scanline_offset(pipe_config)); - drm_printf(&p, "vblank delay: %d, framestart delay: %d, MSA timing delay: %d\n", + drm_printf(&p, "vblank delay: %d, framestart delay: %d, MSA timing delay: %d set context latency: %d\n", pipe_config->hw.adjusted_mode.crtc_vblank_start - pipe_config->hw.adjusted_mode.crtc_vdisplay, - pipe_config->framestart_delay, pipe_config->msa_timing_delay); + pipe_config->framestart_delay, pipe_config->msa_timing_delay, + pipe_config->set_context_latency); drm_printf(&p, "vrr: %s, fixed rr: %s, vmin: %d, vmax: %d, flipline: %d, pipeline full: %d, guardband: %d vsync start: %d, vsync end: %d\n", str_yes_no(pipe_config->vrr.enable), diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 679c2a9baaee..050b6849dedc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2361,39 +2361,44 @@ static int intel_crtc_compute_pipe_mode(struct intel_crtc_state *crtc_state) return 0; } -static int intel_crtc_vblank_delay(const struct intel_crtc_state *crtc_state) +static int intel_crtc_set_context_latency(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - int vblank_delay = 0; + int set_context_latency = 0; if (!HAS_DSB(display)) return 0; - vblank_delay = max(vblank_delay, intel_psr_min_set_context_latency(crtc_state)); + set_context_latency = max(set_context_latency, + intel_psr_min_set_context_latency(crtc_state)); - return vblank_delay; + return set_context_latency; } -static int intel_crtc_compute_vblank_delay(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static int intel_crtc_compute_set_context_latency(struct intel_atomic_state *state, + struct intel_crtc *crtc) { struct intel_display *display = to_intel_display(state); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - int vblank_delay, max_vblank_delay; + int set_context_latency, max_vblank_delay; + + set_context_latency = intel_crtc_set_context_latency(crtc_state); - vblank_delay = intel_crtc_vblank_delay(crtc_state); max_vblank_delay = adjusted_mode->crtc_vblank_end - adjusted_mode->crtc_vblank_start - 1; - if (vblank_delay > max_vblank_delay) { - drm_dbg_kms(display->drm, "[CRTC:%d:%s] vblank delay (%d) exceeds max (%d)\n", - crtc->base.base.id, crtc->base.name, vblank_delay, max_vblank_delay); + if (set_context_latency > max_vblank_delay) { + drm_dbg_kms(display->drm, "[CRTC:%d:%s] set context latency (%d) exceeds max (%d)\n", + crtc->base.base.id, crtc->base.name, + set_context_latency, + max_vblank_delay); return -EINVAL; } - adjusted_mode->crtc_vblank_start += vblank_delay; + crtc_state->set_context_latency = set_context_latency; + adjusted_mode->crtc_vblank_start += set_context_latency; return 0; } @@ -2405,7 +2410,7 @@ static int intel_crtc_compute_config(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); int ret; - ret = intel_crtc_compute_vblank_delay(state, crtc); + ret = intel_crtc_compute_set_context_latency(state, crtc); if (ret) return ret; @@ -2617,7 +2622,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta if (DISPLAY_VER(display) >= 13) { intel_de_write(display, TRANS_SET_CONTEXT_LATENCY(display, cpu_transcoder), - crtc_vblank_start - crtc_vdisplay); + crtc_state->set_context_latency); /* * VBLANK_START not used by hw, just clear it @@ -2707,7 +2712,7 @@ static void intel_set_transcoder_timings_lrr(const struct intel_crtc_state *crtc if (DISPLAY_VER(display) >= 13) { intel_de_write(display, TRANS_SET_CONTEXT_LATENCY(display, cpu_transcoder), - crtc_vblank_start - crtc_vdisplay); + crtc_state->set_context_latency); /* * VBLANK_START not used by hw, just clear it @@ -2820,11 +2825,24 @@ static void intel_get_transcoder_timings(struct intel_crtc *crtc, adjusted_mode->crtc_vblank_end += 1; } - if (DISPLAY_VER(display) >= 13 && !transcoder_is_dsi(cpu_transcoder)) - adjusted_mode->crtc_vblank_start = - adjusted_mode->crtc_vdisplay + + if (DISPLAY_VER(display) >= 13 && !transcoder_is_dsi(cpu_transcoder)) { + pipe_config->set_context_latency = intel_de_read(display, TRANS_SET_CONTEXT_LATENCY(display, cpu_transcoder)); + adjusted_mode->crtc_vblank_start = + adjusted_mode->crtc_vdisplay + + pipe_config->set_context_latency; + } else if (DISPLAY_VER(display) == 12) { + /* + * TGL doesn't have a dedicated register for SCL. + * Instead, the hardware derives SCL from the difference between + * TRANS_VBLANK.vblank_start and TRANS_VTOTAL.vactive. + * To reflect the HW behaviour, readout the value for SCL as + * Vblank start - Vactive. + */ + pipe_config->set_context_latency = + adjusted_mode->crtc_vblank_start - adjusted_mode->crtc_vdisplay; + } if (DISPLAY_VER(display) >= 30) pipe_config->min_hblank = intel_de_read(display, @@ -5387,6 +5405,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(vrr.guardband); } + PIPE_CONF_CHECK_I(set_context_latency); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_LLI diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 358ab922d7a7..029c47743f8b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1341,6 +1341,9 @@ struct intel_crtc_state { /* LOBF flag */ bool has_lobf; + + /* W2 window or 'set context latency' lines */ + u16 set_context_latency; }; enum intel_pipe_crc_source { -- cgit v1.2.3 From 6441d9038fa9dbc2d737368ee31a3388d7e90859 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:35 +0530 Subject: drm/i915/vrr: Use set_context_latency instead of intel_vrr_real_vblank_delay() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The helper intel_vrr_real_vblank_delay() was added to account for the SCL lines for TGL where we do not have the TRANS_SET_CONTEXT_LATENCY. Now, since we already are tracking the SCL with new member `set_context_latency` use it directly instead of the helper. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-4-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 98d28de2e451..e188e5f07987 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -79,12 +79,6 @@ intel_vrr_check_modeset(struct intel_atomic_state *state) } } -static int intel_vrr_real_vblank_delay(const struct intel_crtc_state *crtc_state) -{ - return crtc_state->hw.adjusted_mode.crtc_vblank_start - - crtc_state->hw.adjusted_mode.crtc_vdisplay; -} - static int intel_vrr_extra_vblank_delay(struct intel_display *display) { /* @@ -102,7 +96,7 @@ int intel_vrr_vblank_delay(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - return intel_vrr_real_vblank_delay(crtc_state) + + return crtc_state->set_context_latency + intel_vrr_extra_vblank_delay(display); } @@ -263,7 +257,7 @@ static int intel_vrr_hw_value(const struct intel_crtc_state *crtc_state, if (DISPLAY_VER(display) >= 13) return value; else - return value - intel_vrr_real_vblank_delay(crtc_state); + return value - crtc_state->set_context_latency; } /* @@ -768,9 +762,9 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) if (DISPLAY_VER(display) < 13) { /* undo what intel_vrr_hw_value() does when writing the values */ - crtc_state->vrr.flipline += intel_vrr_real_vblank_delay(crtc_state); - crtc_state->vrr.vmax += intel_vrr_real_vblank_delay(crtc_state); - crtc_state->vrr.vmin += intel_vrr_real_vblank_delay(crtc_state); + crtc_state->vrr.flipline += crtc_state->set_context_latency; + crtc_state->vrr.vmax += crtc_state->set_context_latency; + crtc_state->vrr.vmin += crtc_state->set_context_latency; crtc_state->vrr.vmin += intel_vrr_vmin_flipline_offset(display); } -- cgit v1.2.3 From b811a7635ac2a0a8ca44bc91986237a71cbbbb9a Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:36 +0530 Subject: drm/i915/vrr: Use SCL for computing guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now guardband is equal to the vblank length so ideally it should be computed as difference between the vmin vtotal and vactive. However since we are having few lines as SCL, we need to account for this while computing the guardband. Since the vblank start is moved by SCL lines from the vactive, the delta between the vmin vtotal and new vblank start was used to account for this. Now that SCL is explicitly tracked using the `set_context_latency` member, use it directly in the guardband calculation. In the future, when the guardband is shortened or optimized, we may need to factor in both the change in the vblank start and SCL lines. For now, explicitly accounting for SCL is sufficient. v2: Fix typo: replace adjusted_mode->vdisplay with adjusted_mode->crtc_vdisplay. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-5-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index e188e5f07987..e414fb53552f 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -418,7 +418,9 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) return; crtc_state->vrr.guardband = - crtc_state->vrr.vmin - adjusted_mode->crtc_vblank_start - + crtc_state->vrr.vmin - + adjusted_mode->crtc_vdisplay - + crtc_state->set_context_latency - intel_vrr_extra_vblank_delay(display); if (DISPLAY_VER(display) < 13) { -- cgit v1.2.3 From 2a3831cd8081c9007661a0a0bf4b235dbf6ec01b Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:37 +0530 Subject: drm/i915/dsb: s/intel_dsb_wait_vblank_delay/intel_dsb_wait_for_delayed_vblank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The helper intel_dsb_wait_vblank_delay() is used in DSB to wait for the delayed vblank after the send push operation. Rename it to intel_dsb_wait_for_delayed_vblank() to align with the semantics. v2: Rename to intel_dsb_wait_vblank_delay instead of the proposed SCL semantics, as this will be ot only about SCL lines with different timing generator and different refresh rate modes. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-6-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_dsb.c | 4 ++-- drivers/gpu/drm/i915/display/intel_dsb.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 671db6926e4c..51db70d07fae 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -2013,7 +2013,7 @@ void intel_color_prepare_commit(struct intel_atomic_state *state, if (crtc_state->use_dsb && intel_color_uses_chained_dsb(crtc_state)) { intel_vrr_send_push(crtc_state->dsb_color, crtc_state); - intel_dsb_wait_vblank_delay(state, crtc_state->dsb_color); + intel_dsb_wait_for_delayed_vblank(state, crtc_state->dsb_color); intel_vrr_check_push_sent(crtc_state->dsb_color, crtc_state); intel_dsb_interrupt(crtc_state->dsb_color); } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 050b6849dedc..b57efd870774 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7271,7 +7271,7 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, intel_dsb_wait_vblanks(new_crtc_state->dsb_commit, 1); intel_vrr_send_push(new_crtc_state->dsb_commit, new_crtc_state); - intel_dsb_wait_vblank_delay(state, new_crtc_state->dsb_commit); + intel_dsb_wait_for_delayed_vblank(state, new_crtc_state->dsb_commit); intel_vrr_check_push_sent(new_crtc_state->dsb_commit, new_crtc_state); intel_dsb_interrupt(new_crtc_state->dsb_commit); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index dee44d45b668..135d40852e4c 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -815,8 +815,8 @@ void intel_dsb_chain(struct intel_atomic_state *state, wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0); } -void intel_dsb_wait_vblank_delay(struct intel_atomic_state *state, - struct intel_dsb *dsb) +void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state, + struct intel_dsb *dsb) { struct intel_crtc *crtc = dsb->crtc; const struct intel_crtc_state *crtc_state = diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index c8f4499916eb..2f31f2c1d0c5 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -48,8 +48,8 @@ void intel_dsb_nonpost_end(struct intel_dsb *dsb); void intel_dsb_interrupt(struct intel_dsb *dsb); void intel_dsb_wait_usec(struct intel_dsb *dsb, int count); void intel_dsb_wait_vblanks(struct intel_dsb *dsb, int count); -void intel_dsb_wait_vblank_delay(struct intel_atomic_state *state, - struct intel_dsb *dsb); +void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state, + struct intel_dsb *dsb); void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, struct intel_dsb *dsb, int lower, int upper); -- cgit v1.2.3 From 4e5c033cfe851625908b2121bf2c01eadabc4906 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:38 +0530 Subject: drm/i915/display: Wait for scl start instead of dsb_wait_vblanks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until LNL, intel_dsb_wait_vblanks() used to wait for the undelayed vblank start. However, from PTL onwards, it waits for the start of the safe-window defined by the number of lines programmed in the register TRANS_SET_CONTEXT_LATENCY. This change was introduced to move the SCL window out of the vblank region, supporting modes with higher refresh rates and smaller vblanks. This change introduces a "safe window" a scanline range from (undelayed vblank - SCL) to (delayed vblank - SCL). As a result, on PTL+ platforms, the DSB wait for vblank completes exactly SCL lines earlier than the undelayed vblank start (safe window start). If the flip occurs in the active region and the push happens before the vmin decision boundary, the DSB wait fires early, and the push is sent inside this safe window. In such cases, the push bit is cleared at the delayed vblank, but our wait logic does not account for the early trigger, leading to DSB poll errors. To fix this, we add an explicit wait for the end of the safe window i.e., the scanline range from (undelayed vblank - SCL) to (delayed vblank - SCL). Once past this window, we are exactly SCL lines away from the delayed vblank, and our existing wait logic works as intended. This additional wait is only effective if the push occurs before the vmin decision boundary. If the push happens after the boundary, the hardware already guarantees we're SCL lines away from the delayed vblank, and the extra wait becomes a no-op. v2: - Use helpers for safe window start/end. (Ville) - Move the extra wait inside the helper to wait for delayed vblank. (Ville) - Update the commit message. v3: - Add more documentation for explanation for the wait. (Ville) - Rename intel_vrr_vmin_safe_window_start/end as this is vmin safe window. (Ville) - Minor refactoring to align with the code. (Ville) - Update the commit message for more clarity. v4: - Retain name for intel_vrr_safe_window_start as it doesn't change with vmin/vmax etc. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-7-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/display/intel_vrr.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/display/intel_vrr.h | 2 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 135d40852e4c..1d9379a3240b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -824,6 +824,22 @@ void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state, int usecs = intel_scanlines_to_usecs(&crtc_state->hw.adjusted_mode, dsb_vblank_delay(state, crtc)); + /* + * If the push happened before the vmin decision boundary + * we don't know how far we are from the undelayed vblank. + * Wait until we're past the vmin safe window, at which + * point we're SCL lines away from the delayed vblank. + * + * If the push happened after the vmin decision boundary + * the hardware itself guarantees that we're SCL lines + * away from the delayed vblank, and we won't be inside + * the vmin safe window so this extra wait does nothing. + */ + if (pre_commit_is_vrr_active(state, crtc)) + intel_dsb_wait_scanline_out(state, dsb, + intel_vrr_safe_window_start(crtc_state), + intel_vrr_vmin_safe_window_end(crtc_state)); + intel_dsb_wait_usec(dsb, usecs); } diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index e414fb53552f..5f78b1af5fd5 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -807,3 +807,20 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) if (crtc_state->vrr.enable) crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } + +int intel_vrr_safe_window_start(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (DISPLAY_VER(display) >= 30) + return crtc_state->hw.adjusted_mode.crtc_vdisplay - + crtc_state->set_context_latency; + else + return crtc_state->hw.adjusted_mode.crtc_vdisplay; +} + +int intel_vrr_vmin_safe_window_end(const struct intel_crtc_state *crtc_state) +{ + return intel_vrr_vmin_vblank_start(crtc_state) - + crtc_state->set_context_latency; +} diff --git a/drivers/gpu/drm/i915/display/intel_vrr.h b/drivers/gpu/drm/i915/display/intel_vrr.h index 38bf9996b883..32f8644fc369 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.h +++ b/drivers/gpu/drm/i915/display/intel_vrr.h @@ -41,5 +41,7 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state); void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state); void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state *crtc_state); bool intel_vrr_always_use_vrr_tg(struct intel_display *display); +int intel_vrr_safe_window_start(const struct intel_crtc_state *crtc_state); +int intel_vrr_vmin_safe_window_end(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VRR_H__ */ -- cgit v1.2.3 From 94da8e5eee9c2e33cc1d2d61029c9db0c6c5a55a Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:39 +0530 Subject: drm/i915/reg_defs: Add REG_FIELD_MAX wrapper for FIELD_MAX() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce REG_FIELD_MAX macro as local wrapper around FIELD_MAX() to return the maximum value representable by a bit mask. The value is cast to u32 for consistency with other REG_* macros and assumes the bitfield fits within 32 bits. v2: Use __mask as macro argument aligning with other macros. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250924141542.3122126-8-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/i915_reg_defs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h index bfe98cb9a038..e81fac8ab51b 100644 --- a/drivers/gpu/drm/i915/i915_reg_defs.h +++ b/drivers/gpu/drm/i915/i915_reg_defs.h @@ -174,6 +174,16 @@ */ #define REG_FIELD_GET8(__mask, __val) ((u8)FIELD_GET(__mask, __val)) +/** + * REG_FIELD_MAX() - produce the maximum value representable by a field + * @__mask: shifted mask defining the field's length and position + * + * Local wrapper for FIELD_MAX() to return the maximum bit value that can + * be held in the field specified by @_mask, cast to u32 for consistency + * with other macros. + */ +#define REG_FIELD_MAX(__mask) ((u32)FIELD_MAX(__mask)) + typedef struct { u32 reg; } i915_reg_t; -- cgit v1.2.3 From 5f9172bf6f18da9c43f738b02de904d454459071 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:40 +0530 Subject: drm/i915/vrr: Clamp guardband as per hardware and timing constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The maximum guardband value is constrained by two factors: - The actual vblank length minus set context latency (SCL) - The hardware register field width: - 8 bits for ICL/TGL (VRR_CTL_PIPELINE_FULL_MASK -> max 255) - 16 bits for ADL+ (XELPD_VRR_CTL_VRR_GUARDBAND_MASK -> max 65535) Remove the #FIXME and clamp the guardband to the maximum allowed value. v2: - Use REG_FIELD_MAX(). (Ville) - Separate out functions for intel_vrr_max_guardband(), intel_vrr_max_vblank_guardband(). (Ville) v3: - Fix Typo: Add the missing adjusted_mode->crtc_vdisplay in guardband computation. (Ville) - Refactor intel_vrr_max_hw_guardband() and use else for consistency. (Ville) v4: - Drop max_guardband from intel_vrr_max_hw_guardband(). (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä (#v2) Link: https://lore.kernel.org/r/20250924141542.3122126-9-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 47 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5f78b1af5fd5..da073bbabc46 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -409,6 +409,38 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, } } +static int +intel_vrr_max_hw_guardband(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + int max_pipeline_full = REG_FIELD_MAX(VRR_CTL_PIPELINE_FULL_MASK); + + if (DISPLAY_VER(display) >= 13) + return REG_FIELD_MAX(XELPD_VRR_CTL_VRR_GUARDBAND_MASK); + else + return intel_vrr_pipeline_full_to_guardband(crtc_state, + max_pipeline_full); +} + +static int +intel_vrr_max_vblank_guardband(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + + return crtc_state->vrr.vmin - + adjusted_mode->crtc_vdisplay - + crtc_state->set_context_latency - + intel_vrr_extra_vblank_delay(display); +} + +static int +intel_vrr_max_guardband(struct intel_crtc_state *crtc_state) +{ + return min(intel_vrr_max_hw_guardband(crtc_state), + intel_vrr_max_vblank_guardband(crtc_state)); +} + void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -417,22 +449,13 @@ void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) if (!intel_vrr_possible(crtc_state)) return; - crtc_state->vrr.guardband = - crtc_state->vrr.vmin - - adjusted_mode->crtc_vdisplay - - crtc_state->set_context_latency - - intel_vrr_extra_vblank_delay(display); - - if (DISPLAY_VER(display) < 13) { - /* FIXME handle the limit in a proper way */ - crtc_state->vrr.guardband = - min(crtc_state->vrr.guardband, - intel_vrr_pipeline_full_to_guardband(crtc_state, 255)); + crtc_state->vrr.guardband = min(crtc_state->vrr.vmin - adjusted_mode->crtc_vdisplay, + intel_vrr_max_guardband(crtc_state)); + if (DISPLAY_VER(display) < 13) crtc_state->vrr.pipeline_full = intel_vrr_guardband_to_pipeline_full(crtc_state, crtc_state->vrr.guardband); - } } static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 68f1d1764d50cf65f466cac0e6286f40c8ff93e9 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 24 Sep 2025 19:45:41 +0530 Subject: drm/i915/display: Drop intel_vrr_vblank_delay and use set_context_latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The helper intel_vrr_vblank_delay() was used to keep track of the SCL lines + the extra vblank delay required for ICL/TGL. This was used to wait for sufficient lines for: -push send bit to clear for VRR case -evasion to delay the commit. For first case we are using safe window scanline wait and with that we just need to wait for SCL lines, we do not need to wait for the extra vblank delay required for ICL/TGL. For the second case, we actually do not need to wait for extra lines before the undelayed vblank, if we are already in the safe window. To sum up, SCL lines is sufficient for both cases. So drop the helper intel_vrr_vblank_delay and just use crtc_state->set_context_latency instead. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250924141542.3122126-10-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 4 ++-- drivers/gpu/drm/i915/display/intel_vblank.c | 2 +- drivers/gpu/drm/i915/display/intel_vrr.c | 8 -------- drivers/gpu/drm/i915/display/intel_vrr.h | 1 - 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 1d9379a3240b..ae8574880ef2 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -128,7 +128,7 @@ static int dsb_vblank_delay(struct intel_atomic_state *state, * scanline until the delayed vblank occurs after * TRANS_PUSH has been written. */ - return intel_vrr_vblank_delay(crtc_state) + 1; + return crtc_state->set_context_latency + 1; else return intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode); } @@ -723,7 +723,7 @@ void intel_dsb_vblank_evade(struct intel_atomic_state *state, intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, 0, 0); if (pre_commit_is_vrr_active(state, crtc)) { - int vblank_delay = intel_vrr_vblank_delay(crtc_state); + int vblank_delay = crtc_state->set_context_latency; end = intel_vrr_vmin_vblank_start(crtc_state); start = end - vblank_delay - latency; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index c15234c1d96e..0b7fcc05e64c 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -681,7 +681,7 @@ void intel_vblank_evade_init(const struct intel_crtc_state *old_crtc_state, else evade->vblank_start = intel_vrr_vmax_vblank_start(crtc_state); - vblank_delay = intel_vrr_vblank_delay(crtc_state); + vblank_delay = crtc_state->set_context_latency; } else { evade->vblank_start = intel_mode_vblank_start(adjusted_mode); diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index da073bbabc46..190c51be5cbc 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -92,14 +92,6 @@ static int intel_vrr_extra_vblank_delay(struct intel_display *display) return DISPLAY_VER(display) < 13 ? 1 : 0; } -int intel_vrr_vblank_delay(const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - - return crtc_state->set_context_latency + - intel_vrr_extra_vblank_delay(display); -} - static int intel_vrr_vmin_flipline_offset(struct intel_display *display) { /* diff --git a/drivers/gpu/drm/i915/display/intel_vrr.h b/drivers/gpu/drm/i915/display/intel_vrr.h index 32f8644fc369..7317f8730089 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.h +++ b/drivers/gpu/drm/i915/display/intel_vrr.h @@ -35,7 +35,6 @@ int intel_vrr_vmax_vtotal(const struct intel_crtc_state *crtc_state); int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state); int intel_vrr_vmax_vblank_start(const struct intel_crtc_state *crtc_state); int intel_vrr_vmin_vblank_start(const struct intel_crtc_state *crtc_state); -int intel_vrr_vblank_delay(const struct intel_crtc_state *crtc_state); bool intel_vrr_is_fixed_rr(const struct intel_crtc_state *crtc_state); void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state); void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state); -- cgit v1.2.3 From fe3c035330f545712919f9fe0a6613f4abc8ad56 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 25 Sep 2025 07:53:52 +0530 Subject: drm/i915/dsb: Inline dsb_vblank_delay() into intel_dsb_wait_for_delayed_vblank() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the now single-use dsb_vblank_delay() helper and inline its logic directly into intel_dsb_wait_for_delayed_vblank(). This will help to keep all VRR related wait stuff in one place. v2: Use intel_scanlines_to_usecs() in intel_dsb_wait_usec(). (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250925022352.3129859-1-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 59 ++++++++++++++------------------ 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index ae8574880ef2..4ad4efbf9253 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -115,24 +115,6 @@ static bool pre_commit_is_vrr_active(struct intel_atomic_state *state, return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc); } -static int dsb_vblank_delay(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - const struct intel_crtc_state *crtc_state = - intel_pre_commit_crtc_state(state, crtc); - - if (pre_commit_is_vrr_active(state, crtc)) - /* - * When the push is sent during vblank it will trigger - * on the next scanline, hence we have up to one extra - * scanline until the delayed vblank occurs after - * TRANS_PUSH has been written. - */ - return crtc_state->set_context_latency + 1; - else - return intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode); -} - static int dsb_vtotal(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -821,26 +803,37 @@ void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state, struct intel_crtc *crtc = dsb->crtc; const struct intel_crtc_state *crtc_state = intel_pre_commit_crtc_state(state, crtc); - int usecs = intel_scanlines_to_usecs(&crtc_state->hw.adjusted_mode, - dsb_vblank_delay(state, crtc)); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + int wait_scanlines; - /* - * If the push happened before the vmin decision boundary - * we don't know how far we are from the undelayed vblank. - * Wait until we're past the vmin safe window, at which - * point we're SCL lines away from the delayed vblank. - * - * If the push happened after the vmin decision boundary - * the hardware itself guarantees that we're SCL lines - * away from the delayed vblank, and we won't be inside - * the vmin safe window so this extra wait does nothing. - */ - if (pre_commit_is_vrr_active(state, crtc)) + if (pre_commit_is_vrr_active(state, crtc)) { + /* + * If the push happened before the vmin decision boundary + * we don't know how far we are from the undelayed vblank. + * Wait until we're past the vmin safe window, at which + * point we're SCL lines away from the delayed vblank. + * + * If the push happened after the vmin decision boundary + * the hardware itself guarantees that we're SCL lines + * away from the delayed vblank, and we won't be inside + * the vmin safe window so this extra wait does nothing. + */ intel_dsb_wait_scanline_out(state, dsb, intel_vrr_safe_window_start(crtc_state), intel_vrr_vmin_safe_window_end(crtc_state)); + /* + * When the push is sent during vblank it will trigger + * on the next scanline, hence we have up to one extra + * scanline until the delayed vblank occurs after + * TRANS_PUSH has been written. + */ + wait_scanlines = crtc_state->set_context_latency + 1; + } else { + wait_scanlines = intel_mode_vblank_delay(adjusted_mode); + } - intel_dsb_wait_usec(dsb, usecs); + intel_dsb_wait_usec(dsb, intel_scanlines_to_usecs(adjusted_mode, wait_scanlines)); } /** -- cgit v1.2.3 From 69b4d367fff6d311da6e3ce1b8b34b3e37b59b5a Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Thu, 18 Sep 2025 21:45:16 +0000 Subject: drm/i915/gvt: Simplify case switch in intel_vgpu_ioctl We do not need a case switch to check cap_type_id in intel_vgpu_ioctl for various reasons (it's impossible to hit the default case in the current code, there's only one valid case to check, the error handling code overlaps in both cases, etc.). Simplify the case switch into a single if statement. This has the additional effect of simplifying the error handling code. Note that it is still currently impossible for 'if (cap_type_id == VFIO_REGION_INFO_CAP_SPARSE_MMAP)' to fail, but we should still guard against the possibility of this changing in the future. Signed-off-by: Jonathan Cavitt Cc: Andi Shyti Reviewed-by: Zhenyu Wang Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250918214515.66926-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 69830a5c49d3..70af86d46fe8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1279,20 +1279,15 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, } if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { - switch (cap_type_id) { - case VFIO_REGION_INFO_CAP_SPARSE_MMAP: + ret = -EINVAL; + if (cap_type_id == VFIO_REGION_INFO_CAP_SPARSE_MMAP) ret = vfio_info_add_capability(&caps, &sparse->header, struct_size(sparse, areas, sparse->nr_areas)); - if (ret) { - kfree(sparse); - return ret; - } - break; - default: + if (ret) { kfree(sparse); - return -EINVAL; + return ret; } } -- cgit v1.2.3 From 22a2f2e35f9c08c8572003d1e6d3f0e9ab968837 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 23 Sep 2025 21:23:33 +0000 Subject: drm/i915/gvt: Improve intel_vgpu_ioctl hdr error handling Add error handling for the following VFIO_DEVICE_SET_IRQS cases with respect to the hdr struct: - More than one VFIO_IRQ_DATA_TYPE_MASK flag is set in hdr.flags - More than one VFIO_IRQ_ACTION_TYPE_MASK flag is set in hdr.flags - hdr.count is not specified Note that since hdr.count != 0, data_size != 0 is guaranteed unless vfio_set_irqs_validate_and_prepare fails and returns an error. So, we no longer need to check data_size before running memdup_user because checking the return value of the function is sufficient. v2: Use correct name for mask v3: Use is_power_of_2 over hweight32 as it's more efficient (Andi) Signed-off-by: Jonathan Cavitt Cc: Andi Shyti Reviewed-by: Zhenyu Wang Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250923212332.112137-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 70af86d46fe8..183128b84630 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1356,21 +1356,27 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, if (copy_from_user(&hdr, (void __user *)arg, minsz)) return -EFAULT; + if (!is_power_of_2(hdr.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) || + !is_power_of_2(hdr.flags & VFIO_IRQ_SET_ACTION_TYPE_MASK)) + return -EINVAL; + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { int max = intel_vgpu_get_irq_count(vgpu, hdr.index); + if (!hdr.count) + return -EINVAL; + ret = vfio_set_irqs_validate_and_prepare(&hdr, max, VFIO_PCI_NUM_IRQS, &data_size); if (ret) { gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); return -EINVAL; } - if (data_size) { - data = memdup_user((void __user *)(arg + minsz), - data_size); - if (IS_ERR(data)) - return PTR_ERR(data); - } + + data = memdup_user((void __user *)(arg + minsz), + data_size); + if (IS_ERR(data)) + return PTR_ERR(data); } ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index, -- cgit v1.2.3 From 40ac9b3eb09085dd729032ac5e1ea213068f34cc Mon Sep 17 00:00:00 2001 From: Madhur Kumar Date: Wed, 24 Sep 2025 01:20:51 +0530 Subject: drm/i915: i915_pmu: Use sysfs_emit() instead of sprintf() Follow the advice in Documentation/filesystems/sysfs.rst: show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Madhur Kumar Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250923195051.1277855-1-madhurkumar004@gmail.com --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 5bc696bfbb0f..d8f69bba79a9 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -895,7 +895,7 @@ static ssize_t i915_pmu_format_show(struct device *dev, struct i915_str_attribute *eattr; eattr = container_of(attr, struct i915_str_attribute, attr); - return sprintf(buf, "%s\n", eattr->str); + return sysfs_emit(buf, "%s\n", eattr->str); } #define I915_PMU_FORMAT_ATTR(_name, _config) \ @@ -925,7 +925,7 @@ static ssize_t i915_pmu_event_show(struct device *dev, struct i915_ext_attribute *eattr; eattr = container_of(attr, struct i915_ext_attribute, attr); - return sprintf(buf, "config=0x%lx\n", eattr->val); + return sysfs_emit(buf, "config=0x%lx\n", eattr->val); } #define __event(__counter, __name, __unit) \ -- cgit v1.2.3 From 924adb0bbdd8fef25fd229c76e3f602c3e8752ee Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 22 Sep 2025 13:27:25 +0300 Subject: drm/i915/psr: Deactivate PSR only on LNL and when selective fetch enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using intel_psr_exit in frontbuffer flush on older platforms seems to be causing problems. Sending single full frame update using intel_psr_force_update is anyways more optimal compared to psr deactivate/activate -> move back to this approach on PSR1, PSR HW tracking and Panel Replay full frame update and use deactivate/activate only on LunarLake and only when selective fetch is enabled. Tested-by: Lemen Tested-by: Koos Vriezen Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14946 Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://lore.kernel.org/r/20250922102725.2752742-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 49ccd0864c55..f7115969b4c5 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3402,6 +3402,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { + /* Selective fetch prior LNL */ if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3420,12 +3421,19 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) intel_psr_configure_full_frame_update(intel_dp); intel_psr_force_update(intel_dp); + } else if (!intel_dp->psr.psr2_sel_fetch_enabled) { + /* + * PSR1 on all platforms + * PSR2 HW tracking + * Panel Replay Full frame update + */ + intel_psr_force_update(intel_dp); } else { + /* Selective update LNL onwards */ intel_psr_exit(intel_dp); } - if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && - !intel_dp->psr.busy_frontbuffer_bits) + if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } -- cgit v1.2.3 From 3198609ecc951b31b7e824e77c9c6460d711a55e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:30 +0300 Subject: drm/{i915, xe}/stolen: rename i915_stolen_fb to intel_stolen_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a more generic name than one that refers to "i915" and "fb". Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/925fd07d3f2a6115c71984f5a40a06c9eb46a539.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 2 +- drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 0d380c825791..6a7357fa89db 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -102,7 +102,7 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct i915_stolen_fb compressed_fb, compressed_llb; + struct intel_stolen_node compressed_fb, compressed_llb; enum intel_fbc_id id; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index dfe0db8bb1b9..c2f9c994e0ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -12,7 +12,7 @@ struct drm_i915_private; struct drm_mm_node; struct drm_i915_gem_object; -#define i915_stolen_fb drm_mm_node +#define intel_stolen_node drm_mm_node int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index f097fc6d5127..62389b290907 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -12,12 +12,12 @@ struct xe_bo; -struct i915_stolen_fb { +struct intel_stolen_node { struct xe_bo *bo; }; static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct i915_stolen_fb *fb, + struct intel_stolen_node *fb, u32 size, u32 align, u32 start, u32 end) { @@ -47,7 +47,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, } static inline int i915_gem_stolen_insert_node(struct xe_device *xe, - struct i915_stolen_fb *fb, + struct intel_stolen_node *fb, u32 size, u32 align) { /* Not used on xe */ @@ -56,7 +56,7 @@ static inline int i915_gem_stolen_insert_node(struct xe_device *xe, } static inline void i915_gem_stolen_remove_node(struct xe_device *xe, - struct i915_stolen_fb *fb) + struct intel_stolen_node *fb) { xe_bo_unpin_map_no_vm(fb->bo); fb->bo = NULL; @@ -65,7 +65,7 @@ static inline void i915_gem_stolen_remove_node(struct xe_device *xe, #define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) #define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo)) -static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb) +static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *fb) { struct xe_res_cursor res; -- cgit v1.2.3 From 511d2d70dfc96ccc37eef7203b345a7dbe55e254 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:31 +0300 Subject: drm/xe/stolen: rename fb to node in stolen compat header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's more about node than fb, and this makes more sense now that the struct is also named intel_stolen_node. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/71a7872e47da5f3fbe61cc21723bfcf8ff6518b8.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 62389b290907..b45575b15322 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -17,7 +17,7 @@ struct intel_stolen_node { }; static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct intel_stolen_node *fb, + struct intel_stolen_node *node, u32 size, u32 align, u32 start, u32 end) { @@ -41,13 +41,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, return err; } - fb->bo = bo; + node->bo = bo; return err; } static inline int i915_gem_stolen_insert_node(struct xe_device *xe, - struct intel_stolen_node *fb, + struct intel_stolen_node *node, u32 size, u32 align) { /* Not used on xe */ @@ -56,20 +56,20 @@ static inline int i915_gem_stolen_insert_node(struct xe_device *xe, } static inline void i915_gem_stolen_remove_node(struct xe_device *xe, - struct intel_stolen_node *fb) + struct intel_stolen_node *node) { - xe_bo_unpin_map_no_vm(fb->bo); - fb->bo = NULL; + xe_bo_unpin_map_no_vm(node->bo); + node->bo = NULL; } #define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) -#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo)) +#define i915_gem_stolen_node_allocated(node) (!!((node)->bo)) -static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *fb) +static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) { struct xe_res_cursor res; - xe_res_first(fb->bo->ttm.resource, 0, 4096, &res); + xe_res_first(node->bo->ttm.resource, 0, 4096, &res); return res.start; } @@ -78,8 +78,8 @@ static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *fb) /* Used for gen9 specific WA. Gen9 is not supported by Xe */ #define i915_gem_stolen_area_size(xe) (!WARN_ON(1)) -#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \ - i915_gem_stolen_node_offset(fb)) -#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size)) +#define i915_gem_stolen_node_address(xe, node) (xe_ttm_stolen_gpu_offset(xe) + \ + i915_gem_stolen_node_offset(node)) +#define i915_gem_stolen_node_size(node) ((u64)((node)->bo->ttm.base.size)) #endif -- cgit v1.2.3 From b6500640acbcc93a371be3a253a558db81132e6b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:32 +0300 Subject: drm/xe/stolen: convert compat stolen macros to inline functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve type safety. Allows getting rid of a __maybe_unused annotation too. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/1ec1fa59e0e54da49a1ec4fd1d535288066db502.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 39 ++++++++++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 6a7357fa89db..cc67de6c06cf 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -797,7 +797,7 @@ static u64 intel_fbc_cfb_base_max(struct intel_display *display) static u64 intel_fbc_stolen_end(struct intel_display *display) { - struct drm_i915_private __maybe_unused *i915 = to_i915(display->drm); + struct drm_i915_private *i915 = to_i915(display->drm); u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index b45575b15322..2c77457837e4 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -62,8 +62,15 @@ static inline void i915_gem_stolen_remove_node(struct xe_device *xe, node->bo = NULL; } -#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) -#define i915_gem_stolen_node_allocated(node) (!!((node)->bo)) +static inline bool i915_gem_stolen_initialized(struct xe_device *xe) +{ + return ttm_manager_type(&xe->ttm, XE_PL_STOLEN); +} + +static inline bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node) +{ + return node->bo; +} static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) { @@ -74,12 +81,30 @@ static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) } /* Used for < gen4. These are not supported by Xe */ -#define i915_gem_stolen_area_address(xe) (!WARN_ON(1)) +static inline u64 i915_gem_stolen_area_address(const struct xe_device *xe) +{ + WARN_ON(1); + + return 0; +} + /* Used for gen9 specific WA. Gen9 is not supported by Xe */ -#define i915_gem_stolen_area_size(xe) (!WARN_ON(1)) +static inline u64 i915_gem_stolen_area_size(const struct xe_device *xe) +{ + WARN_ON(1); + + return 0; +} -#define i915_gem_stolen_node_address(xe, node) (xe_ttm_stolen_gpu_offset(xe) + \ - i915_gem_stolen_node_offset(node)) -#define i915_gem_stolen_node_size(node) ((u64)((node)->bo->ttm.base.size)) +static inline u64 i915_gem_stolen_node_address(struct xe_device *xe, + struct intel_stolen_node *node) +{ + return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node); +} + +static inline u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) +{ + return node->bo->ttm.base.size; +} #endif -- cgit v1.2.3 From 9f697958bb6e8baec08569210dc0545b8447dd4d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:33 +0300 Subject: drm/xe/stolen: switch from BUG_ON() to WARN_ON() in compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're pretty much never supposed to be using BUG_ON(). Switch to WARN_ON(). Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/d14c693a3387a5d89bb88e81349639b5ec5663fb.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 2c77457837e4..be249f51231d 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -51,7 +51,8 @@ static inline int i915_gem_stolen_insert_node(struct xe_device *xe, u32 size, u32 align) { /* Not used on xe */ - BUG_ON(1); + WARN_ON(1); + return -ENODEV; } -- cgit v1.2.3 From 59998644b79ace2abb73379cb1940c3f5ac09376 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:34 +0300 Subject: drm/i915/stolen: convert intel_stolen_node into a real struct of its own MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915_gem_stolen.h simply defines intel_stolen_node as drm_mm_node. Make struct intel_stolen_node an actual struct of its own right, and embed struct drm_mm_node inside. This allow better unification between i915 and xe. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/36762f611566d81427e702369f4e8207ead5f26c.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 66 ++++++++++++++++++++---------- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 22 +++++----- 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 3380151edfc1..70ee34303e36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -36,9 +36,9 @@ * for is a boon. */ -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, - struct drm_mm_node *node, u64 size, - unsigned alignment, u64 start, u64 end) +static int __i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, + struct drm_mm_node *node, u64 size, + unsigned int alignment, u64 start, u64 end) { int ret; @@ -58,24 +58,46 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, return ret; } +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, + struct intel_stolen_node *node, u64 size, + unsigned int alignment, u64 start, u64 end) +{ + return __i915_gem_stolen_insert_node_in_range(i915, &node->node, + size, alignment, + start, end); +} + +static int __i915_gem_stolen_insert_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u64 size, + unsigned int alignment) +{ + return __i915_gem_stolen_insert_node_in_range(i915, node, + size, alignment, + I915_GEM_STOLEN_BIAS, + U64_MAX); +} + int i915_gem_stolen_insert_node(struct drm_i915_private *i915, - struct drm_mm_node *node, u64 size, - unsigned alignment) + struct intel_stolen_node *node, u64 size, + unsigned int alignment) { - return i915_gem_stolen_insert_node_in_range(i915, node, - size, alignment, - I915_GEM_STOLEN_BIAS, - U64_MAX); + return __i915_gem_stolen_insert_node(i915, &node->node, size, alignment); } -void i915_gem_stolen_remove_node(struct drm_i915_private *i915, - struct drm_mm_node *node) +static void __i915_gem_stolen_remove_node(struct drm_i915_private *i915, + struct drm_mm_node *node) { mutex_lock(&i915->mm.stolen_lock); drm_mm_remove_node(node); mutex_unlock(&i915->mm.stolen_lock); } +void i915_gem_stolen_remove_node(struct drm_i915_private *i915, + struct intel_stolen_node *node) +{ + __i915_gem_stolen_remove_node(i915, &node->node); +} + static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm) { return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start; @@ -683,7 +705,7 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); GEM_BUG_ON(!stolen); - i915_gem_stolen_remove_node(i915, stolen); + __i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); i915_gem_object_release_memory_region(obj); @@ -772,8 +794,8 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); mutex_unlock(&i915->mm.stolen_lock); } else { - ret = i915_gem_stolen_insert_node(i915, stolen, size, - mem->min_page_size); + ret = __i915_gem_stolen_insert_node(i915, stolen, size, + mem->min_page_size); } if (ret) goto err_free; @@ -785,7 +807,7 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, return 0; err_remove: - i915_gem_stolen_remove_node(i915, stolen); + __i915_gem_stolen_remove_node(i915, stolen); err_free: kfree(stolen); return ret; @@ -1016,22 +1038,22 @@ u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915) } u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915, - const struct drm_mm_node *node) + const struct intel_stolen_node *node) { return i915->dsm.stolen.start + i915_gem_stolen_node_offset(node); } -bool i915_gem_stolen_node_allocated(const struct drm_mm_node *node) +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node) { - return drm_mm_node_allocated(node); + return drm_mm_node_allocated(&node->node); } -u64 i915_gem_stolen_node_offset(const struct drm_mm_node *node) +u64 i915_gem_stolen_node_offset(const struct intel_stolen_node *node) { - return node->start; + return node->node.start; } -u64 i915_gem_stolen_node_size(const struct drm_mm_node *node) +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) { - return node->size; + return node->node.size; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index c2f9c994e0ae..9e42d5a4cf13 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -7,22 +7,24 @@ #define __I915_GEM_STOLEN_H__ #include +#include -struct drm_i915_private; -struct drm_mm_node; struct drm_i915_gem_object; +struct drm_i915_private; -#define intel_stolen_node drm_mm_node +struct intel_stolen_node { + struct drm_mm_node node; +}; int i915_gem_stolen_insert_node(struct drm_i915_private *i915, - struct drm_mm_node *node, u64 size, + struct intel_stolen_node *node, u64 size, unsigned alignment); int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, - struct drm_mm_node *node, u64 size, + struct intel_stolen_node *node, u64 size, unsigned alignment, u64 start, u64 end); void i915_gem_stolen_remove_node(struct drm_i915_private *i915, - struct drm_mm_node *node); + struct intel_stolen_node *node); struct intel_memory_region * i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, u16 instance); @@ -43,10 +45,10 @@ u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915); u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915); u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915, - const struct drm_mm_node *node); + const struct intel_stolen_node *node); -bool i915_gem_stolen_node_allocated(const struct drm_mm_node *node); -u64 i915_gem_stolen_node_offset(const struct drm_mm_node *node); -u64 i915_gem_stolen_node_size(const struct drm_mm_node *node); +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); +u64 i915_gem_stolen_node_offset(const struct intel_stolen_node *node); +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); #endif /* __I915_GEM_STOLEN_H__ */ -- cgit v1.2.3 From 33c8d948bc87658e8d9ed1dfd05dfbbb417d3e75 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:35 +0300 Subject: drm/xe/stolen: convert compat static inlines to proper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add display/xe_stolen.c as the implementation for the stolen interface exposed to display. This allows hiding the implementation details that shouldn't be exposed to display. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/8e807c6aafc6151b18df08dda20053516813e001.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/Makefile | 1 + .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 104 ++++----------------- drivers/gpu/drm/xe/display/xe_stolen.c | 99 ++++++++++++++++++++ 3 files changed, 119 insertions(+), 85 deletions(-) create mode 100644 drivers/gpu/drm/xe/display/xe_stolen.c diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index d9c6cf0f189e..ac65722e5d38 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -214,6 +214,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_hdcp_gsc.o \ display/xe_panic.o \ display/xe_plane_initial.o \ + display/xe_stolen.o \ display/xe_tdf.o # SOC code shared with i915 diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index be249f51231d..10f110b9bf77 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -6,106 +6,40 @@ #ifndef _I915_GEM_STOLEN_H_ #define _I915_GEM_STOLEN_H_ -#include "xe_ttm_stolen_mgr.h" -#include "xe_res_cursor.h" -#include "xe_validation.h" +#include struct xe_bo; +struct xe_device; struct intel_stolen_node { struct xe_bo *bo; }; -static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct intel_stolen_node *node, - u32 size, u32 align, - u32 start, u32 end) -{ - struct xe_bo *bo; - int err = 0; - u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; - - if (start < SZ_4K) - start = SZ_4K; - - if (align) { - size = ALIGN(size, align); - start = ALIGN(start, align); - } - - bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), - size, start, end, ttm_bo_type_kernel, flags); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - bo = NULL; - return err; - } - - node->bo = bo; - - return err; -} - -static inline int i915_gem_stolen_insert_node(struct xe_device *xe, - struct intel_stolen_node *node, - u32 size, u32 align) -{ - /* Not used on xe */ - WARN_ON(1); - - return -ENODEV; -} - -static inline void i915_gem_stolen_remove_node(struct xe_device *xe, - struct intel_stolen_node *node) -{ - xe_bo_unpin_map_no_vm(node->bo); - node->bo = NULL; -} - -static inline bool i915_gem_stolen_initialized(struct xe_device *xe) -{ - return ttm_manager_type(&xe->ttm, XE_PL_STOLEN); -} +int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, + struct intel_stolen_node *node, + u32 size, u32 align, + u32 start, u32 end); -static inline bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node) -{ - return node->bo; -} +int i915_gem_stolen_insert_node(struct xe_device *xe, + struct intel_stolen_node *node, + u32 size, u32 align); -static inline u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) -{ - struct xe_res_cursor res; +void i915_gem_stolen_remove_node(struct xe_device *xe, + struct intel_stolen_node *node); - xe_res_first(node->bo->ttm.resource, 0, 4096, &res); - return res.start; -} +bool i915_gem_stolen_initialized(struct xe_device *xe); -/* Used for < gen4. These are not supported by Xe */ -static inline u64 i915_gem_stolen_area_address(const struct xe_device *xe) -{ - WARN_ON(1); +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); - return 0; -} +u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node); -/* Used for gen9 specific WA. Gen9 is not supported by Xe */ -static inline u64 i915_gem_stolen_area_size(const struct xe_device *xe) -{ - WARN_ON(1); +u64 i915_gem_stolen_area_address(const struct xe_device *xe); - return 0; -} +u64 i915_gem_stolen_area_size(const struct xe_device *xe); -static inline u64 i915_gem_stolen_node_address(struct xe_device *xe, - struct intel_stolen_node *node) -{ - return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node); -} +u64 i915_gem_stolen_node_address(struct xe_device *xe, + struct intel_stolen_node *node); -static inline u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) -{ - return node->bo->ttm.base.size; -} +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); #endif diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c new file mode 100644 index 000000000000..ab156a9f2c26 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include "gem/i915_gem_stolen.h" +#include "xe_res_cursor.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_validation.h" + +int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, + struct intel_stolen_node *node, + u32 size, u32 align, + u32 start, u32 end) +{ + struct xe_bo *bo; + int err = 0; + u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; + + if (start < SZ_4K) + start = SZ_4K; + + if (align) { + size = ALIGN(size, align); + start = ALIGN(start, align); + } + + bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), + size, start, end, ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + bo = NULL; + return err; + } + + node->bo = bo; + + return err; +} + +int i915_gem_stolen_insert_node(struct xe_device *xe, + struct intel_stolen_node *node, + u32 size, u32 align) +{ + /* Not used on xe */ + WARN_ON(1); + + return -ENODEV; +} + +void i915_gem_stolen_remove_node(struct xe_device *xe, + struct intel_stolen_node *node) +{ + xe_bo_unpin_map_no_vm(node->bo); + node->bo = NULL; +} + +bool i915_gem_stolen_initialized(struct xe_device *xe) +{ + return ttm_manager_type(&xe->ttm, XE_PL_STOLEN); +} + +bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node) +{ + return node->bo; +} + +u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) +{ + struct xe_res_cursor res; + + xe_res_first(node->bo->ttm.resource, 0, 4096, &res); + return res.start; +} + +/* Used for < gen4. These are not supported by Xe */ +u64 i915_gem_stolen_area_address(const struct xe_device *xe) +{ + WARN_ON(1); + + return 0; +} + +/* Used for gen9 specific WA. Gen9 is not supported by Xe */ +u64 i915_gem_stolen_area_size(const struct xe_device *xe) +{ + WARN_ON(1); + + return 0; +} + +u64 i915_gem_stolen_node_address(struct xe_device *xe, + struct intel_stolen_node *node) +{ + return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node); +} + +u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) +{ + return node->bo->ttm.base.size; +} -- cgit v1.2.3 From f74bab2d903e442a7bf68f08007edfe8859ef9a7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:36 +0300 Subject: drm/{i915, xe}/stolen: make struct intel_stolen_node opaque MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add i915_gem_stolen_node_alloc() and i915_gem_stolen_node_free(), returning struct intel_stolen_node pointer. Make struct intel_stolen_node an opaque pointer, with different implementations in i915 and xe. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/3fe71bbb4c75ee86b4d129fafa3d4cd6526363f4.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 58 ++++++++++++++-------- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 20 ++++++++ drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 10 ++-- .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 11 ++-- drivers/gpu/drm/xe/display/xe_stolen.c | 20 ++++++++ 5 files changed, 89 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index cc67de6c06cf..05199434cb7c 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -102,7 +102,8 @@ struct intel_fbc { struct mutex lock; unsigned int busy_bits; - struct intel_stolen_node compressed_fb, compressed_llb; + struct intel_stolen_node *compressed_fb; + struct intel_stolen_node *compressed_llb; enum intel_fbc_id id; @@ -380,16 +381,16 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) drm_WARN_ON(display->drm, range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), - i915_gem_stolen_node_offset(&fbc->compressed_fb), + i915_gem_stolen_node_offset(fbc->compressed_fb), U32_MAX)); drm_WARN_ON(display->drm, range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), - i915_gem_stolen_node_offset(&fbc->compressed_llb), + i915_gem_stolen_node_offset(fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, - i915_gem_stolen_node_address(i915, &fbc->compressed_fb)); + i915_gem_stolen_node_address(i915, fbc->compressed_fb)); intel_de_write(display, FBC_LL_BASE, - i915_gem_stolen_node_address(i915, &fbc->compressed_llb)); + i915_gem_stolen_node_address(i915, fbc->compressed_llb)); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -497,7 +498,7 @@ static void g4x_fbc_program_cfb(struct intel_fbc *fbc) struct intel_display *display = fbc->display; intel_de_write(display, DPFC_CB_BASE, - i915_gem_stolen_node_offset(&fbc->compressed_fb)); + i915_gem_stolen_node_offset(fbc->compressed_fb)); } static const struct intel_fbc_funcs g4x_fbc_funcs = { @@ -566,7 +567,7 @@ static void ilk_fbc_program_cfb(struct intel_fbc *fbc) struct intel_display *display = fbc->display; intel_de_write(display, ILK_DPFC_CB_BASE(fbc->id), - i915_gem_stolen_node_offset(&fbc->compressed_fb)); + i915_gem_stolen_node_offset(fbc->compressed_fb)); } static const struct intel_fbc_funcs ilk_fbc_funcs = { @@ -842,13 +843,13 @@ static int find_compression_limit(struct intel_fbc *fbc, size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb, + ret = i915_gem_stolen_insert_node_in_range(i915, fbc->compressed_fb, size <<= 1, 4096, 0, end); if (ret == 0) return limit; for (; limit <= intel_fbc_max_limit(display); limit <<= 1) { - ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb, + ret = i915_gem_stolen_insert_node_in_range(i915, fbc->compressed_fb, size >>= 1, 4096, 0, end); if (ret == 0) return limit; @@ -865,12 +866,12 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, int ret; drm_WARN_ON(display->drm, - i915_gem_stolen_node_allocated(&fbc->compressed_fb)); + i915_gem_stolen_node_allocated(fbc->compressed_fb)); drm_WARN_ON(display->drm, - i915_gem_stolen_node_allocated(&fbc->compressed_llb)); + i915_gem_stolen_node_allocated(fbc->compressed_llb)); if (DISPLAY_VER(display) < 5 && !display->platform.g4x) { - ret = i915_gem_stolen_insert_node(i915, &fbc->compressed_llb, + ret = i915_gem_stolen_insert_node(i915, fbc->compressed_llb, 4096, 4096); if (ret) goto err; @@ -887,12 +888,12 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, drm_dbg_kms(display->drm, "reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n", - i915_gem_stolen_node_size(&fbc->compressed_fb), fbc->limit); + i915_gem_stolen_node_size(fbc->compressed_fb), fbc->limit); return 0; err_llb: - if (i915_gem_stolen_node_allocated(&fbc->compressed_llb)) - i915_gem_stolen_remove_node(i915, &fbc->compressed_llb); + if (i915_gem_stolen_node_allocated(fbc->compressed_llb)) + i915_gem_stolen_remove_node(i915, fbc->compressed_llb); err: if (i915_gem_stolen_initialized(i915)) drm_info_once(display->drm, @@ -951,10 +952,10 @@ static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) if (WARN_ON(intel_fbc_hw_is_active(fbc))) return; - if (i915_gem_stolen_node_allocated(&fbc->compressed_llb)) - i915_gem_stolen_remove_node(i915, &fbc->compressed_llb); - if (i915_gem_stolen_node_allocated(&fbc->compressed_fb)) - i915_gem_stolen_remove_node(i915, &fbc->compressed_fb); + if (i915_gem_stolen_node_allocated(fbc->compressed_llb)) + i915_gem_stolen_remove_node(i915, fbc->compressed_llb); + if (i915_gem_stolen_node_allocated(fbc->compressed_fb)) + i915_gem_stolen_remove_node(i915, fbc->compressed_fb); } void intel_fbc_cleanup(struct intel_display *display) @@ -967,6 +968,9 @@ void intel_fbc_cleanup(struct intel_display *display) __intel_fbc_cleanup_cfb(fbc); mutex_unlock(&fbc->lock); + i915_gem_stolen_node_free(fbc->compressed_fb); + i915_gem_stolen_node_free(fbc->compressed_llb); + kfree(fbc); } } @@ -1355,7 +1359,7 @@ static bool intel_fbc_is_cfb_ok(const struct intel_plane_state *plane_state) return intel_fbc_min_limit(plane_state) <= fbc->limit && intel_fbc_cfb_size(plane_state) <= fbc->limit * - i915_gem_stolen_node_size(&fbc->compressed_fb); + i915_gem_stolen_node_size(fbc->compressed_fb); } static bool intel_fbc_is_ok(const struct intel_plane_state *plane_state) @@ -2083,6 +2087,13 @@ static struct intel_fbc *intel_fbc_create(struct intel_display *display, if (!fbc) return NULL; + fbc->compressed_fb = i915_gem_stolen_node_alloc(display->drm); + if (!fbc->compressed_fb) + goto err; + fbc->compressed_llb = i915_gem_stolen_node_alloc(display->drm); + if (!fbc->compressed_llb) + goto err; + fbc->id = fbc_id; fbc->display = display; INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); @@ -2102,6 +2113,13 @@ static struct intel_fbc *intel_fbc_create(struct intel_display *display, fbc->funcs = &i8xx_fbc_funcs; return fbc; + +err: + i915_gem_stolen_node_free(fbc->compressed_llb); + i915_gem_stolen_node_free(fbc->compressed_fb); + kfree(fbc); + + return NULL; } /** diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 70ee34303e36..5991ccd3f328 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -24,6 +24,10 @@ #include "intel_mchbar_regs.h" #include "intel_pci_config.h" +struct intel_stolen_node { + struct drm_mm_node node; +}; + /* * The BIOS typically reserves some of the system's memory for the exclusive * use of the integrated graphics. This memory is no longer available for @@ -1057,3 +1061,19 @@ u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) { return node->node.size; } + +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm) +{ + struct intel_stolen_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return NULL; + + return node; +} + +void i915_gem_stolen_node_free(const struct intel_stolen_node *node) +{ + kfree(node); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 9e42d5a4cf13..25ec8325fb29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -7,14 +7,11 @@ #define __I915_GEM_STOLEN_H__ #include -#include +struct drm_device; struct drm_i915_gem_object; struct drm_i915_private; - -struct intel_stolen_node { - struct drm_mm_node node; -}; +struct intel_stolen_node; int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct intel_stolen_node *node, u64 size, @@ -51,4 +48,7 @@ bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); u64 i915_gem_stolen_node_offset(const struct intel_stolen_node *node); u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm); +void i915_gem_stolen_node_free(const struct intel_stolen_node *node); + #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 10f110b9bf77..b0bc3efcaf6c 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -8,13 +8,10 @@ #include -struct xe_bo; +struct drm_device; +struct intel_stolen_node; struct xe_device; -struct intel_stolen_node { - struct xe_bo *bo; -}; - int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, struct intel_stolen_node *node, u32 size, u32 align, @@ -42,4 +39,8 @@ u64 i915_gem_stolen_node_address(struct xe_device *xe, u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm); + +void i915_gem_stolen_node_free(const struct intel_stolen_node *node); + #endif diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c index ab156a9f2c26..b218df40324a 100644 --- a/drivers/gpu/drm/xe/display/xe_stolen.c +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -6,6 +6,10 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_validation.h" +struct intel_stolen_node { + struct xe_bo *bo; +}; + int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, struct intel_stolen_node *node, u32 size, u32 align, @@ -97,3 +101,19 @@ u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) { return node->bo->ttm.base.size; } + +struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm) +{ + struct intel_stolen_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return NULL; + + return node; +} + +void i915_gem_stolen_node_free(const struct intel_stolen_node *node) +{ + kfree(node); +} -- cgit v1.2.3 From e8848d3d37490157e31ef583cb7098e368644254 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:37 +0300 Subject: drm/{i915, xe}/stolen: add device pointer to struct intel_stolen_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add backpointers to i915/xe to allow simplifying some interfaces in follow-up. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/321354d47f9e530159caefef510d5394f4177470.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 4 ++++ drivers/gpu/drm/xe/display/xe_stolen.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 5991ccd3f328..8bc71fb2a765 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -25,6 +25,7 @@ #include "intel_pci_config.h" struct intel_stolen_node { + struct drm_i915_private *i915; struct drm_mm_node node; }; @@ -1064,12 +1065,15 @@ u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); struct intel_stolen_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; + node->i915 = i915; + return node; } diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c index b218df40324a..eea182b569a1 100644 --- a/drivers/gpu/drm/xe/display/xe_stolen.c +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -7,6 +7,7 @@ #include "xe_validation.h" struct intel_stolen_node { + struct xe_device *xe; struct xe_bo *bo; }; @@ -104,12 +105,15 @@ u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node) struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm) { + struct xe_device *xe = to_xe_device(drm); struct intel_stolen_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; + node->xe = xe; + return node; } -- cgit v1.2.3 From 994c74ea5e07ec93a8616602caaca95a70efdb21 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:38 +0300 Subject: drm/{i915, xe}/stolen: use the stored i915/xe device pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we store the i915/xe device pointer in struct intel_stolen_node, we can reduce parameter passing in a number of functions. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/6f31114c8113ce2254d422ca53992088b673fb2f.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 21 ++++++++------------- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 20 +++++++++----------- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 12 ++++-------- .../xe/compat-i915-headers/gem/i915_gem_stolen.h | 13 ++++--------- drivers/gpu/drm/xe/display/xe_stolen.c | 17 ++++++++--------- 5 files changed, 33 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 05199434cb7c..0d837527aaab 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -388,9 +388,9 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) i915_gem_stolen_node_offset(fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, - i915_gem_stolen_node_address(i915, fbc->compressed_fb)); + i915_gem_stolen_node_address(fbc->compressed_fb)); intel_de_write(display, FBC_LL_BASE, - i915_gem_stolen_node_address(i915, fbc->compressed_llb)); + i915_gem_stolen_node_address(fbc->compressed_llb)); } static const struct intel_fbc_funcs i8xx_fbc_funcs = { @@ -836,20 +836,19 @@ static int find_compression_limit(struct intel_fbc *fbc, unsigned int size, int min_limit) { struct intel_display *display = fbc->display; - struct drm_i915_private *i915 = to_i915(display->drm); u64 end = intel_fbc_stolen_end(display); int ret, limit = min_limit; size /= limit; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(i915, fbc->compressed_fb, + ret = i915_gem_stolen_insert_node_in_range(fbc->compressed_fb, size <<= 1, 4096, 0, end); if (ret == 0) return limit; for (; limit <= intel_fbc_max_limit(display); limit <<= 1) { - ret = i915_gem_stolen_insert_node_in_range(i915, fbc->compressed_fb, + ret = i915_gem_stolen_insert_node_in_range(fbc->compressed_fb, size >>= 1, 4096, 0, end); if (ret == 0) return limit; @@ -871,8 +870,7 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, i915_gem_stolen_node_allocated(fbc->compressed_llb)); if (DISPLAY_VER(display) < 5 && !display->platform.g4x) { - ret = i915_gem_stolen_insert_node(i915, fbc->compressed_llb, - 4096, 4096); + ret = i915_gem_stolen_insert_node(fbc->compressed_llb, 4096, 4096); if (ret) goto err; } @@ -893,7 +891,7 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, err_llb: if (i915_gem_stolen_node_allocated(fbc->compressed_llb)) - i915_gem_stolen_remove_node(i915, fbc->compressed_llb); + i915_gem_stolen_remove_node(fbc->compressed_llb); err: if (i915_gem_stolen_initialized(i915)) drm_info_once(display->drm, @@ -946,16 +944,13 @@ static void intel_fbc_program_workarounds(struct intel_fbc *fbc) static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) { - struct intel_display *display = fbc->display; - struct drm_i915_private *i915 = to_i915(display->drm); - if (WARN_ON(intel_fbc_hw_is_active(fbc))) return; if (i915_gem_stolen_node_allocated(fbc->compressed_llb)) - i915_gem_stolen_remove_node(i915, fbc->compressed_llb); + i915_gem_stolen_remove_node(fbc->compressed_llb); if (i915_gem_stolen_node_allocated(fbc->compressed_fb)) - i915_gem_stolen_remove_node(i915, fbc->compressed_fb); + i915_gem_stolen_remove_node(fbc->compressed_fb); } void intel_fbc_cleanup(struct intel_display *display) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 8bc71fb2a765..b2812ec79d19 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -63,11 +63,10 @@ static int __i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, return ret; } -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, - struct intel_stolen_node *node, u64 size, +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, unsigned int alignment, u64 start, u64 end) { - return __i915_gem_stolen_insert_node_in_range(i915, &node->node, + return __i915_gem_stolen_insert_node_in_range(node->i915, &node->node, size, alignment, start, end); } @@ -82,11 +81,10 @@ static int __i915_gem_stolen_insert_node(struct drm_i915_private *i915, U64_MAX); } -int i915_gem_stolen_insert_node(struct drm_i915_private *i915, - struct intel_stolen_node *node, u64 size, +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, unsigned int alignment) { - return __i915_gem_stolen_insert_node(i915, &node->node, size, alignment); + return __i915_gem_stolen_insert_node(node->i915, &node->node, size, alignment); } static void __i915_gem_stolen_remove_node(struct drm_i915_private *i915, @@ -97,10 +95,9 @@ static void __i915_gem_stolen_remove_node(struct drm_i915_private *i915, mutex_unlock(&i915->mm.stolen_lock); } -void i915_gem_stolen_remove_node(struct drm_i915_private *i915, - struct intel_stolen_node *node) +void i915_gem_stolen_remove_node(struct intel_stolen_node *node) { - __i915_gem_stolen_remove_node(i915, &node->node); + __i915_gem_stolen_remove_node(node->i915, &node->node); } static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm) @@ -1042,9 +1039,10 @@ u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915) return resource_size(&i915->dsm.stolen); } -u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915, - const struct intel_stolen_node *node) +u64 i915_gem_stolen_node_address(const struct intel_stolen_node *node) { + struct drm_i915_private *i915 = node->i915; + return i915->dsm.stolen.start + i915_gem_stolen_node_offset(node); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 25ec8325fb29..c670736f09e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -13,15 +13,12 @@ struct drm_i915_gem_object; struct drm_i915_private; struct intel_stolen_node; -int i915_gem_stolen_insert_node(struct drm_i915_private *i915, - struct intel_stolen_node *node, u64 size, +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, unsigned alignment); -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915, - struct intel_stolen_node *node, u64 size, +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, unsigned alignment, u64 start, u64 end); -void i915_gem_stolen_remove_node(struct drm_i915_private *i915, - struct intel_stolen_node *node); +void i915_gem_stolen_remove_node(struct intel_stolen_node *node); struct intel_memory_region * i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, u16 instance); @@ -41,8 +38,7 @@ bool i915_gem_stolen_initialized(const struct drm_i915_private *i915); u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915); u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915); -u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915, - const struct intel_stolen_node *node); +u64 i915_gem_stolen_node_address(const struct intel_stolen_node *node); bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); u64 i915_gem_stolen_node_offset(const struct intel_stolen_node *node); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index b0bc3efcaf6c..7bdf73fad8bf 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -12,17 +12,13 @@ struct drm_device; struct intel_stolen_node; struct xe_device; -int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct intel_stolen_node *node, +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u32 size, u32 align, u32 start, u32 end); -int i915_gem_stolen_insert_node(struct xe_device *xe, - struct intel_stolen_node *node, - u32 size, u32 align); +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u32 size, u32 align); -void i915_gem_stolen_remove_node(struct xe_device *xe, - struct intel_stolen_node *node); +void i915_gem_stolen_remove_node(struct intel_stolen_node *node); bool i915_gem_stolen_initialized(struct xe_device *xe); @@ -34,8 +30,7 @@ u64 i915_gem_stolen_area_address(const struct xe_device *xe); u64 i915_gem_stolen_area_size(const struct xe_device *xe); -u64 i915_gem_stolen_node_address(struct xe_device *xe, - struct intel_stolen_node *node); +u64 i915_gem_stolen_node_address(struct intel_stolen_node *node); u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node); diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c index eea182b569a1..c1afe70454d1 100644 --- a/drivers/gpu/drm/xe/display/xe_stolen.c +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -11,11 +11,12 @@ struct intel_stolen_node { struct xe_bo *bo; }; -int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, - struct intel_stolen_node *node, +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u32 size, u32 align, u32 start, u32 end) { + struct xe_device *xe = node->xe; + struct xe_bo *bo; int err = 0; u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; @@ -41,9 +42,7 @@ int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, return err; } -int i915_gem_stolen_insert_node(struct xe_device *xe, - struct intel_stolen_node *node, - u32 size, u32 align) +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u32 size, u32 align) { /* Not used on xe */ WARN_ON(1); @@ -51,8 +50,7 @@ int i915_gem_stolen_insert_node(struct xe_device *xe, return -ENODEV; } -void i915_gem_stolen_remove_node(struct xe_device *xe, - struct intel_stolen_node *node) +void i915_gem_stolen_remove_node(struct intel_stolen_node *node) { xe_bo_unpin_map_no_vm(node->bo); node->bo = NULL; @@ -92,9 +90,10 @@ u64 i915_gem_stolen_area_size(const struct xe_device *xe) return 0; } -u64 i915_gem_stolen_node_address(struct xe_device *xe, - struct intel_stolen_node *node) +u64 i915_gem_stolen_node_address(struct intel_stolen_node *node) { + struct xe_device *xe = node->xe; + return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node); } -- cgit v1.2.3 From d3741f2c861c754ad39f8f35d03bd26ee04c74ca Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:39 +0300 Subject: drm/{i915, xe}/stolen: convert stolen interface to struct drm_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the stolen interface agnostic to i915/xe, and pass struct drm_device instead. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/bbfc2aeaeee3156e92d49c73983be05b6feeede2.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbc.c | 13 +++++-------- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 12 +++++++++--- drivers/gpu/drm/i915/gem/i915_gem_stolen.h | 6 +++--- .../gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h | 7 +++---- drivers/gpu/drm/xe/display/xe_stolen.c | 8 +++++--- 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 0d837527aaab..4edb4342833e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -377,14 +377,13 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { struct intel_display *display = fbc->display; - struct drm_i915_private *i915 = to_i915(display->drm); drm_WARN_ON(display->drm, - range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(display->drm), i915_gem_stolen_node_offset(fbc->compressed_fb), U32_MAX)); drm_WARN_ON(display->drm, - range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(display->drm), i915_gem_stolen_node_offset(fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, @@ -798,7 +797,6 @@ static u64 intel_fbc_cfb_base_max(struct intel_display *display) static u64 intel_fbc_stolen_end(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen @@ -807,7 +805,7 @@ static u64 intel_fbc_stolen_end(struct intel_display *display) * underruns, even if that range is not reserved by the BIOS. */ if (display->platform.broadwell || (DISPLAY_VER(display) == 9 && !display->platform.broxton)) - end = i915_gem_stolen_area_size(i915) - 8 * 1024 * 1024; + end = i915_gem_stolen_area_size(display->drm) - 8 * 1024 * 1024; else end = U64_MAX; @@ -861,7 +859,6 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, unsigned int size, int min_limit) { struct intel_display *display = fbc->display; - struct drm_i915_private *i915 = to_i915(display->drm); int ret; drm_WARN_ON(display->drm, @@ -893,7 +890,7 @@ err_llb: if (i915_gem_stolen_node_allocated(fbc->compressed_llb)) i915_gem_stolen_remove_node(fbc->compressed_llb); err: - if (i915_gem_stolen_initialized(i915)) + if (i915_gem_stolen_initialized(display->drm)) drm_info_once(display->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; @@ -1435,7 +1432,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, if (!fbc) return 0; - if (!i915_gem_stolen_initialized(i915)) { + if (!i915_gem_stolen_initialized(display->drm)) { plane_state->no_fbc_reason = "stolen memory not initialised"; return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b2812ec79d19..e73b369c3347 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -1024,18 +1024,24 @@ bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj) return obj->ops == &i915_gem_object_stolen_ops; } -bool i915_gem_stolen_initialized(const struct drm_i915_private *i915) +bool i915_gem_stolen_initialized(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); + return drm_mm_initialized(&i915->mm.stolen); } -u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915) +u64 i915_gem_stolen_area_address(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); + return i915->dsm.stolen.start; } -u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915) +u64 i915_gem_stolen_area_size(struct drm_device *drm) { + struct drm_i915_private *i915 = to_i915(drm); + return resource_size(&i915->dsm.stolen); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index c670736f09e1..7b0386002ed4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -34,9 +34,9 @@ bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj); #define I915_GEM_STOLEN_BIAS SZ_128K -bool i915_gem_stolen_initialized(const struct drm_i915_private *i915); -u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915); -u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915); +bool i915_gem_stolen_initialized(struct drm_device *drm); +u64 i915_gem_stolen_area_address(struct drm_device *drm); +u64 i915_gem_stolen_area_size(struct drm_device *drm); u64 i915_gem_stolen_node_address(const struct intel_stolen_node *node); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 7bdf73fad8bf..42927326e567 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -10,7 +10,6 @@ struct drm_device; struct intel_stolen_node; -struct xe_device; int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u32 size, u32 align, @@ -20,15 +19,15 @@ int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u32 size, u32 al void i915_gem_stolen_remove_node(struct intel_stolen_node *node); -bool i915_gem_stolen_initialized(struct xe_device *xe); +bool i915_gem_stolen_initialized(struct drm_device *drm); bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node); u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node); -u64 i915_gem_stolen_area_address(const struct xe_device *xe); +u64 i915_gem_stolen_area_address(struct drm_device *drm); -u64 i915_gem_stolen_area_size(const struct xe_device *xe); +u64 i915_gem_stolen_area_size(struct drm_device *drm); u64 i915_gem_stolen_node_address(struct intel_stolen_node *node); diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c index c1afe70454d1..d7af731d9c90 100644 --- a/drivers/gpu/drm/xe/display/xe_stolen.c +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -56,8 +56,10 @@ void i915_gem_stolen_remove_node(struct intel_stolen_node *node) node->bo = NULL; } -bool i915_gem_stolen_initialized(struct xe_device *xe) +bool i915_gem_stolen_initialized(struct drm_device *drm) { + struct xe_device *xe = to_xe_device(drm); + return ttm_manager_type(&xe->ttm, XE_PL_STOLEN); } @@ -75,7 +77,7 @@ u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node) } /* Used for < gen4. These are not supported by Xe */ -u64 i915_gem_stolen_area_address(const struct xe_device *xe) +u64 i915_gem_stolen_area_address(struct drm_device *drm) { WARN_ON(1); @@ -83,7 +85,7 @@ u64 i915_gem_stolen_area_address(const struct xe_device *xe) } /* Used for gen9 specific WA. Gen9 is not supported by Xe */ -u64 i915_gem_stolen_area_size(const struct xe_device *xe) +u64 i915_gem_stolen_area_size(struct drm_device *drm) { WARN_ON(1); -- cgit v1.2.3 From d630a1bdd66090d49b51521019b905f37ae7a6b2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 24 Sep 2025 19:43:40 +0300 Subject: drm/xe/stolen: use the same types as i915 interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify the i915 and xe interfaces by switching to the same types as i915. Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/f15d41bc232dfa957841f16d9a069c777af40194.1758732183.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h | 8 ++++---- drivers/gpu/drm/xe/display/xe_stolen.c | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 42927326e567..48e3256ba37e 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -11,11 +11,11 @@ struct drm_device; struct intel_stolen_node; -int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, - u32 size, u32 align, - u32 start, u32 end); +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, + unsigned int align, u64 start, u64 end); -int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u32 size, u32 align); +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, + unsigned int align); void i915_gem_stolen_remove_node(struct intel_stolen_node *node); diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c index d7af731d9c90..9f04ba36e930 100644 --- a/drivers/gpu/drm/xe/display/xe_stolen.c +++ b/drivers/gpu/drm/xe/display/xe_stolen.c @@ -11,9 +11,8 @@ struct intel_stolen_node { struct xe_bo *bo; }; -int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, - u32 size, u32 align, - u32 start, u32 end) +int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size, + unsigned int align, u64 start, u64 end) { struct xe_device *xe = node->xe; @@ -42,7 +41,7 @@ int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, return err; } -int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u32 size, u32 align) +int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, unsigned int align) { /* Not used on xe */ WARN_ON(1); -- cgit v1.2.3 From 97825e1c6de7315cba9acb6c1371f1a87dedd904 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 26 Sep 2025 14:10:32 +0300 Subject: drm/{i915,xe}: driver agnostic drm to display pointer chase The display driver needs to get from the struct drm_device pointer to the struct intel_display pointer. Currently, this depends on knowledge of the struct drm_i915_private and struct xe_device definitions, but we'd like to hide those definitions from display. Require the struct drm_device and struct intel_display * members within struct drm_i915_private and struct xe_device to be placed next to each other, to be able to figure out the display pointer without knowledge of the structures. Use a generic dummy device structure to define the relative offsets of the drm and display members, and add static assertions to ensure this holds for both i915 and xe. Use the dummy structure to do the pointer chase from struct drm_device * to struct intel_display *. This requires moving the display member in struct xe_device after the drm member. Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Ville Syrjala Suggested-by: Simona Vetter Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250926111032.1188876-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../drm/i915/display/intel_display_conversion.c | 20 +++++++---- drivers/gpu/drm/i915/i915_driver.c | 4 +++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/xe/display/xe_display.c | 4 +++ drivers/gpu/drm/xe/xe_device_types.h | 7 ++-- include/drm/intel/display_member.h | 42 ++++++++++++++++++++++ 6 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 include/drm/intel/display_member.h diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.c b/drivers/gpu/drm/i915/display/intel_display_conversion.c index d56065f22655..9a47aa38cf82 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.c +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.c @@ -1,15 +1,21 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ -#include "i915_drv.h" -#include "intel_display_conversion.h" +#include -static struct intel_display *__i915_to_display(struct drm_i915_private *i915) -{ - return i915->display; -} +#include "intel_display_conversion.h" struct intel_display *__drm_to_display(struct drm_device *drm) { - return __i915_to_display(to_i915(drm)); + /* + * Note: This relies on both struct drm_i915_private and struct + * xe_device having the struct drm_device and struct intel_display * + * members at the same relative offsets, as defined by struct + * __intel_generic_device. + * + * See also INTEL_DISPLAY_MEMBER_STATIC_ASSERT(). + */ + struct __intel_generic_device *d = container_of(drm, struct __intel_generic_device, drm); + + return d->display; } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 95165e45de74..b46cb54ef5dc 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "display/i9xx_display_sr.h" #include "display/intel_bw.h" @@ -737,6 +738,9 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv) "DRM_I915_DEBUG_RUNTIME_PM enabled\n"); } +/* Ensure drm and display members are placed properly. */ +INTEL_DISPLAY_MEMBER_STATIC_ASSERT(struct drm_i915_private, drm, display); + static struct drm_i915_private * i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03e497d2081e..6e159bb8ad2f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -174,6 +174,7 @@ struct i915_selftest_stash { struct drm_i915_private { struct drm_device drm; + /* display device data, must be placed after drm device member */ struct intel_display *display; /* FIXME: Device release actions should all be moved to drmm_ */ diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 19e691fccf8c..5f4044e63185 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "soc/intel_dram.h" @@ -35,6 +36,9 @@ #include "skl_watermark.h" #include "xe_module.h" +/* Ensure drm and display members are placed properly. */ +INTEL_DISPLAY_MEMBER_STATIC_ASSERT(struct xe_device, drm, display); + /* Xe device functions */ /** diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index a6c361db11d9..53264b2bb832 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -217,6 +217,11 @@ struct xe_device { /** @drm: drm device */ struct drm_device drm; +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + /** @display: display device data, must be placed after drm device member */ + struct intel_display *display; +#endif + /** @devcoredump: device coredump */ struct xe_devcoredump devcoredump; @@ -617,8 +622,6 @@ struct xe_device { * drm_i915_private during build. After cleanup these should go away, * migrating to the right sub-structs */ - struct intel_display *display; - const struct dram_info *dram_info; /* diff --git a/include/drm/intel/display_member.h b/include/drm/intel/display_member.h new file mode 100644 index 000000000000..0319ea560b60 --- /dev/null +++ b/include/drm/intel/display_member.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __DRM_INTEL_DISPLAY_H__ +#define __DRM_INTEL_DISPLAY_H__ + +#include +#include +#include + +#include + +struct intel_display; + +/* + * A dummy device struct to define the relative offsets of drm and display + * members. With the members identically placed in struct drm_i915_private and + * struct xe_device, this allows figuring out the struct intel_display pointer + * without the definition of either driver specific structure. + */ +struct __intel_generic_device { + struct drm_device drm; + struct intel_display *display; +}; + +/** + * INTEL_DISPLAY_MEMBER_STATIC_ASSERT() - ensure correct placing of drm and display members + * @type: The struct to check + * @drm_member: Name of the struct drm_device member + * @display_member: Name of the struct intel_display * member. + * + * Use this static assert macro to ensure the struct drm_i915_private and struct + * xe_device struct drm_device and struct intel_display * members are at the + * same relative offsets. + */ +#define INTEL_DISPLAY_MEMBER_STATIC_ASSERT(type, drm_member, display_member) \ + static_assert( \ + offsetof(struct __intel_generic_device, display) - offsetof(struct __intel_generic_device, drm) == \ + offsetof(type, display_member) - offsetof(type, drm_member), \ + __stringify(type) " " __stringify(drm_member) " and " __stringify(display_member) " members at invalid offsets") + +#endif -- cgit v1.2.3 From cc7e1a9b596c9d9dc3324c056cf8162e9fca2765 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 29 Sep 2025 16:34:18 +0300 Subject: drm/i915/irq: duplicate HAS_FBC() for irq error mask usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error irq handling needs to mask page table errors on gen 2/3 with FBC. See commit e7e12f6ec8bf ("drm/i915: Mask page table errors on gen2/3 with FBC") for details. We want to avoid using display feature checks in i915 core code. Since FBC can't be fused off on gen 2/3, just list the platforms that support FBC. Add a macro purely for making the code self-documenting. With this, we can drop the intel_display_core.h include, and make struct intel_display opaque inside i915_irq.c. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250929133418.2033006-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 11a727b74849..e0a0bd687f1b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -33,7 +33,6 @@ #include -#include "display/intel_display_core.h" #include "display/intel_display_irq.h" #include "display/intel_hotplug.h" #include "display/intel_hotplug_irq.h" @@ -794,9 +793,10 @@ static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) intel_uncore_posting_read(&dev_priv->uncore, GEN8_MASTER_IRQ); } +#define I9XX_HAS_FBC(i915) (IS_I85X(i915) || IS_I865G(i915) || IS_I915GM(i915) || IS_I945GM(i915)) + static u32 i9xx_error_mask(struct drm_i915_private *i915) { - struct intel_display *display = i915->display; /* * On gen2/3 FBC generates (seemingly spurious) * display INVALID_GTT/INVALID_GTT_PTE table errors. @@ -809,7 +809,7 @@ static u32 i9xx_error_mask(struct drm_i915_private *i915) * Unfortunately we can't mask off individual PGTBL_ER bits, * so we just have to mask off all page table errors via EMR. */ - if (HAS_FBC(display)) + if (I9XX_HAS_FBC(i915)) return I915_ERROR_MEMORY_REFRESH; else return I915_ERROR_PAGE_TABLE | -- cgit v1.2.3 From a5e07980e62deeb849e59c9f14299ea3f291969e Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 1 Oct 2025 16:23:36 +0200 Subject: drm/i915/gvt: Propagate vfio_set_irqs_validate_and_prepare() error Return the actual error code from vfio_set_irqs_validate_and_prepare() instead of always collapsing to -EINVAL. While the helper currently returns -EINVAL in most cases, passing through the real error code is more future-proof. While at it, drop the stray 'intel:' prefix from the error message. Signed-off-by: Andi Shyti Reviewed-by: Zhenyu Wang Link: https://lore.kernel.org/r/20251001142336.82089-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 183128b84630..c43b47687838 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1369,8 +1369,8 @@ static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, ret = vfio_set_irqs_validate_and_prepare(&hdr, max, VFIO_PCI_NUM_IRQS, &data_size); if (ret) { - gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); - return -EINVAL; + gvt_vgpu_err("vfio_set_irqs_validate_and_prepare failed\n"); + return ret; } data = memdup_user((void __user *)(arg + minsz), -- cgit v1.2.3 From fcf6bddebfe03574041d7bb5f7e3f18e6b46a426 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:45 +0300 Subject: drm/dp: Add quirk for Synaptics DSC throughput link-bpp limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Synaptics MST branch devices have a problem decompressing a stream with a compressed link-bpp higher than 12, if the pixel clock is higher than ~50 % of the maximum throughput capability reported by the branch device. The screen remains blank, or for some - mostly black content - gets enabled, but may stil have jitter artifacts. At least the following docking stations are affected, based on testing both with any Intel devices or the UCD-500 reference device as a source: - DELL WD19DCS, DELL WD19TB3, DELL WD22TB4 - ThinkPad 40AN - HP G2 At least the following docking stations are free from this problem, based on tests with a source/sink/mode etc. configuration matching the test cases used above: - DELL Dual Charge HD22Q, DELL WD25TB5 - ThinkPad 40B0 - Anker 565 All the affected devices have an older version of the Synaptics MST branch device (Panamera), whereas all the non-affected docking stations have a newer branch device (at least Synaptics Panamera with a higher HW revision number and Synaptics Cayenne models). Add the required quirk entries accordingly. The quirk will be handled by the i915/xe drivers in a follow-up change. The latest firmware version of the Synaptics branch device for all the affected devices tested above is 5.7 (as reported at DPCD address 0x50a/0x50b). For the DELL devices this corresponds to the latest 01.00.14.01.A03 firmware package version of the docking station. v2: - Document the DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT enum. - Describe the quirk in more detail in the dpcd_quirk_list. v3: - s/Panarema/Panamera in the commit log. Cc: dri-devel@lists.freedesktop.org Reported-by: Vidya Srinivas Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Acked-by: Maarten Lankhorst Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-2-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 4 ++++ include/drm/display/drm_dp_helper.h | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 4aaeae4fa03c..4cbcb685f562 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2543,6 +2543,10 @@ static const struct dpcd_quirk dpcd_quirk_list[] = { { OUI(0x00, 0x0C, 0xE7), DEVICE_ID_ANY, false, BIT(DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC) }, /* Apple MacBookPro 2017 15 inch eDP Retina panel reports too low DP_MAX_LINK_RATE */ { OUI(0x00, 0x10, 0xfa), DEVICE_ID(101, 68, 21, 101, 98, 97), false, BIT(DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS) }, + /* Synaptics Panamera supports only a compressed bpp of 12 above 50% of its max DSC pixel throughput */ + { OUI(0x90, 0xCC, 0x24), DEVICE_ID('S', 'Y', 'N', 'A', 0x53, 0x22), true, BIT(DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT) }, + { OUI(0x90, 0xCC, 0x24), DEVICE_ID('S', 'Y', 'N', 'A', 0x53, 0x31), true, BIT(DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT) }, + { OUI(0x90, 0xCC, 0x24), DEVICE_ID('S', 'Y', 'N', 'A', 0x53, 0x33), true, BIT(DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT) }, }; #undef OUI diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 87caa4f1fdb8..558f5ce924ac 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -820,6 +820,15 @@ enum drm_dp_quirk { * requires enabling DSC. */ DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC, + /** + * @DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT: + * + * The device doesn't support DSC decompression at the maximum DSC + * pixel throughput and compressed bpp it indicates via its DPCD DSC + * capabilities. The compressed bpp must be limited above a device + * specific DSC pixel throughput. + */ + DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT, }; /** -- cgit v1.2.3 From 1f95871207db4439a3116e9a86f5b5658a5157c4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:46 +0300 Subject: drm/dp: Add helpers to query the branch DSC max throughput/line-width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers to query the DP DSC sink device's per-slice throughput as well as a DSC branch device's overall throughput and line-width capabilities. v2 (Ville): - Rename pixel_clock to peak_pixel_rate, document what the value means in case of MST tiled displays. - Fix name of drm_dp_dsc_branch_max_slice_throughput() to drm_dp_dsc_sink_max_slice_throughput(). v3: - Fix the DSC branch device minimum valid line width value from 2560 to 5120 pixels. - Fix drm_dp_dsc_sink_max_slice_throughput()'s pixel_clock parameter name to peak_pixel_rate in header file. - Add handling for throughput mode 0 granular delta, defined by DP Standard v2.1a. v4: - Remove the default switch case in drm_dp_dsc_sink_max_slice_throughput(), which is unreachable in the current code. (Ville) Cc: dri-devel@lists.freedesktop.org Suggested-by: Ville Syrjälä Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Reviewed-by: Dmitry Baryshkov Acked-by: Maarten Lankhorst Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-3-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 152 ++++++++++++++++++++++++++++++++ include/drm/display/drm_dp.h | 3 + include/drm/display/drm_dp_helper.h | 5 ++ 3 files changed, 160 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 4cbcb685f562..1ebed7c41b0d 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2836,6 +2836,158 @@ int drm_dp_dsc_sink_supported_input_bpcs(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_S } EXPORT_SYMBOL(drm_dp_dsc_sink_supported_input_bpcs); +/* + * See DP Standard v2.1a 2.8.4 Minimum Slices/Display, Table 2-159 and + * Appendix L.1 Derivation of Slice Count Requirements. + */ +static int dsc_sink_min_slice_throughput(int peak_pixel_rate) +{ + if (peak_pixel_rate >= 4800000) + return 600000; + else if (peak_pixel_rate >= 2700000) + return 400000; + else + return 340000; +} + +/** + * drm_dp_dsc_sink_max_slice_throughput() - Get a DSC sink's maximum pixel throughput per slice + * @dsc_dpcd: DSC sink's capabilities from DPCD + * @peak_pixel_rate: Cumulative peak pixel rate in kHz + * @is_rgb_yuv444: The mode is either RGB or YUV444 + * + * Return the DSC sink device's maximum pixel throughput per slice, based on + * the device's @dsc_dpcd capabilities, the @peak_pixel_rate of the transferred + * stream(s) and whether the output format @is_rgb_yuv444 or yuv422/yuv420. + * + * Note that @peak_pixel_rate is the total pixel rate transferred to the same + * DSC/display sink. For instance to calculate a tile's slice count of an MST + * multi-tiled display sink (not considering here the required + * rounding/alignment of slice count):: + * + * @peak_pixel_rate = tile_pixel_rate * tile_count + * total_slice_count = @peak_pixel_rate / drm_dp_dsc_sink_max_slice_throughput(@peak_pixel_rate) + * tile_slice_count = total_slice_count / tile_count + * + * Returns: + * The maximum pixel throughput per slice supported by the DSC sink device + * in kPixels/sec. + */ +int drm_dp_dsc_sink_max_slice_throughput(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE], + int peak_pixel_rate, bool is_rgb_yuv444) +{ + int throughput; + int delta = 0; + int base; + + throughput = dsc_dpcd[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT]; + + if (is_rgb_yuv444) { + throughput = (throughput & DP_DSC_THROUGHPUT_MODE_0_MASK) >> + DP_DSC_THROUGHPUT_MODE_0_SHIFT; + + delta = ((dsc_dpcd[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT]) & + DP_DSC_THROUGHPUT_MODE_0_DELTA_MASK) >> + DP_DSC_THROUGHPUT_MODE_0_DELTA_SHIFT; /* in units of 2 MPixels/sec */ + delta *= 2000; + } else { + throughput = (throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> + DP_DSC_THROUGHPUT_MODE_1_SHIFT; + } + + switch (throughput) { + case 0: + return dsc_sink_min_slice_throughput(peak_pixel_rate); + case 1: + base = 340000; + break; + case 2 ... 14: + base = 400000 + 50000 * (throughput - 2); + break; + case 15: + base = 170000; + break; + } + + return base + delta; +} +EXPORT_SYMBOL(drm_dp_dsc_sink_max_slice_throughput); + +static u8 dsc_branch_dpcd_cap(const u8 dpcd[DP_DSC_BRANCH_CAP_SIZE], int reg) +{ + return dpcd[reg - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0]; +} + +/** + * drm_dp_dsc_branch_max_overall_throughput() - Branch device's max overall DSC pixel throughput + * @dsc_branch_dpcd: DSC branch capabilities from DPCD + * @is_rgb_yuv444: The mode is either RGB or YUV444 + * + * Return the branch device's maximum overall DSC pixel throughput, based on + * the device's DPCD DSC branch capabilities, and whether the output + * format @is_rgb_yuv444 or yuv422/yuv420. + * + * Returns: + * - 0: The maximum overall throughput capability is not indicated by + * the device separately and it must be determined from the per-slice + * max throughput (see @drm_dp_dsc_branch_slice_max_throughput()) + * and the maximum slice count supported by the device. + * - > 0: The maximum overall DSC pixel throughput supported by the branch + * device in kPixels/sec. + */ +int drm_dp_dsc_branch_max_overall_throughput(const u8 dsc_branch_dpcd[DP_DSC_BRANCH_CAP_SIZE], + bool is_rgb_yuv444) +{ + int throughput; + + if (is_rgb_yuv444) + throughput = dsc_branch_dpcd_cap(dsc_branch_dpcd, + DP_DSC_BRANCH_OVERALL_THROUGHPUT_0); + else + throughput = dsc_branch_dpcd_cap(dsc_branch_dpcd, + DP_DSC_BRANCH_OVERALL_THROUGHPUT_1); + + switch (throughput) { + case 0: + return 0; + case 1: + return 680000; + default: + return 600000 + 50000 * throughput; + } +} +EXPORT_SYMBOL(drm_dp_dsc_branch_max_overall_throughput); + +/** + * drm_dp_dsc_branch_max_line_width() - Branch device's max DSC line width + * @dsc_branch_dpcd: DSC branch capabilities from DPCD + * + * Return the branch device's maximum overall DSC line width, based on + * the device's @dsc_branch_dpcd capabilities. + * + * Returns: + * - 0: The maximum line width is not indicated by the device + * separately and it must be determined from the maximum + * slice count and slice-width supported by the device. + * - %-EINVAL: The device indicates an invalid maximum line width + * (< 5120 pixels). + * - >= 5120: The maximum line width in pixels. + */ +int drm_dp_dsc_branch_max_line_width(const u8 dsc_branch_dpcd[DP_DSC_BRANCH_CAP_SIZE]) +{ + int line_width = dsc_branch_dpcd_cap(dsc_branch_dpcd, DP_DSC_BRANCH_MAX_LINE_WIDTH); + + switch (line_width) { + case 0: + return 0; + case 1 ... 15: + return -EINVAL; + default: + return line_width * 320; + } +} +EXPORT_SYMBOL(drm_dp_dsc_branch_max_line_width); + static int drm_dp_read_lttpr_regs(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE], int address, u8 *buf, int buf_size) diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 811e9238a77c..600bca40249b 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -257,6 +257,8 @@ # define DP_DSC_RC_BUF_BLK_SIZE_4 0x1 # define DP_DSC_RC_BUF_BLK_SIZE_16 0x2 # define DP_DSC_RC_BUF_BLK_SIZE_64 0x3 +# define DP_DSC_THROUGHPUT_MODE_0_DELTA_SHIFT 3 /* DP 2.1a, in units of 2 MPixels/sec */ +# define DP_DSC_THROUGHPUT_MODE_0_DELTA_MASK (0x1f << DP_DSC_THROUGHPUT_MODE_0_DELTA_SHIFT) #define DP_DSC_RC_BUF_SIZE 0x063 @@ -1683,6 +1685,7 @@ enum drm_dp_phy { #define DP_BRANCH_OUI_HEADER_SIZE 0xc #define DP_RECEIVER_CAP_SIZE 0xf #define DP_DSC_RECEIVER_CAP_SIZE 0x10 /* DSC Capabilities 0x60 through 0x6F */ +#define DP_DSC_BRANCH_CAP_SIZE 3 #define EDP_PSR_RECEIVER_CAP_SIZE 2 #define EDP_DISPLAY_CTL_CAP_SIZE 5 #define DP_LTTPR_COMMON_CAP_SIZE 8 diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 558f5ce924ac..fc02eb888be8 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -203,6 +203,11 @@ u8 drm_dp_dsc_sink_max_slice_count(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE], u8 drm_dp_dsc_sink_line_buf_depth(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]); int drm_dp_dsc_sink_supported_input_bpcs(const u8 dsc_dpc[DP_DSC_RECEIVER_CAP_SIZE], u8 dsc_bpc[3]); +int drm_dp_dsc_sink_max_slice_throughput(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE], + int peak_pixel_rate, bool is_rgb_yuv444); +int drm_dp_dsc_branch_max_overall_throughput(const u8 dsc_branch_dpcd[DP_DSC_BRANCH_CAP_SIZE], + bool is_rgb_yuv444); +int drm_dp_dsc_branch_max_line_width(const u8 dsc_branch_dpcd[DP_DSC_BRANCH_CAP_SIZE]); static inline bool drm_dp_sink_supports_dsc(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) -- cgit v1.2.3 From b3797460fd3024f4c0458027f453f52926853e3f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:47 +0300 Subject: drm/i915/dp: Calculate DSC slice count based on per-slice peak throughput MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the DSC sink device's actual per-slice peak throughput to calculate the minimum number of required DSC slices, falling back to the hard-coded throughput values (as suggested by the DP Standard) if the device's reported throughput value is 0. For now use the minimum of the two throughput values, which is ok, potentially resulting in a higher than required minimum slice count. This doesn't change the current way of using the same minimum throughput value regardless of the RGB/YUV output format used. While at it add a TODO comment for MST tiled displays to calculate the slice count for these based on the total pixel rate of all the tiles. v2: Use drm helpers to query the throughput caps. (Ville) v3: Add TODO comment to account for MST tiled displays. (Ville) Cc: Ville Syrjälä Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-4-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2eab591a8ef5..491a804c1f6a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -96,11 +96,6 @@ #include "intel_vdsc.h" #include "intel_vrr.h" -/* DP DSC throughput values used for slice count calculations KPixels/s */ -#define DP_DSC_PEAK_PIXEL_RATE 2720000 -#define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 -#define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 - /* Max DSC line buffer depth supported by HW. */ #define INTEL_DP_DSC_MAX_LINE_BUF_DEPTH 13 @@ -1018,13 +1013,29 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, struct intel_display *display = to_intel_display(connector); u8 min_slice_count, i; int max_slice_width; + int tp_rgb_yuv444; + int tp_yuv422_420; - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); + /* + * TODO: Pass the total pixel rate of all the streams transferred to + * an MST tiled display, calculate the total slice count for all tiles + * from this and the per-tile slice count from the total slice count. + */ + tp_rgb_yuv444 = drm_dp_dsc_sink_max_slice_throughput(connector->dp.dsc_dpcd, + mode_clock, true); + tp_yuv422_420 = drm_dp_dsc_sink_max_slice_throughput(connector->dp.dsc_dpcd, + mode_clock, false); + + /* + * TODO: Use the throughput value specific to the actual RGB/YUV + * format of the output. + * For now use the smaller of these, which is ok, potentially + * resulting in a higher than required minimum slice count. + * The RGB/YUV444 throughput value should be always either equal + * or smaller than the YUV422/420 value, but let's not depend on + * this assumption. + */ + min_slice_count = DIV_ROUND_UP(mode_clock, min(tp_rgb_yuv444, tp_yuv422_420)); /* * Due to some DSC engine BW limitations, we need to enable second -- cgit v1.2.3 From 8e696c8d2c9d626278c124009492481d7787480f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:48 +0300 Subject: drm/i915/dp: Pass DPCD device descriptor to intel_dp_get_dsc_sink_cap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the DPCD sink/branch device descriptor along with the is_branch/sink flag to intel_dp_get_dsc_sink_cap(). These will be used by a follow up change to read out the branch device's DSC overall throughput/line width capabilities and to detect a throughput/link-bpp quirk. Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 ++++- drivers/gpu/drm/i915/display/intel_dp.h | 5 ++++- drivers/gpu/drm/i915/display/intel_dp_mst.c | 9 ++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 491a804c1f6a..2dcc20bcfa0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4180,7 +4180,9 @@ static void intel_dp_read_dsc_dpcd(struct drm_dp_aux *aux, dsc_dpcd); } -void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, struct intel_connector *connector) +void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, + const struct drm_dp_desc *desc, bool is_branch, + struct intel_connector *connector) { struct intel_display *display = to_intel_display(connector); @@ -4231,6 +4233,7 @@ intel_dp_detect_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *conn connector); else intel_dp_get_dsc_sink_cap(intel_dp->dpcd[DP_DPCD_REV], + &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd), connector); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index f90cfd1dbbd0..b379443e0211 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -12,6 +12,7 @@ enum intel_output_format; enum pipe; enum port; struct drm_connector_state; +struct drm_dp_desc; struct drm_dp_vsc_sdp; struct drm_encoder; struct drm_modeset_acquire_ctx; @@ -199,7 +200,9 @@ bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, bool dsc, struct link_config_limits *limits); -void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, struct intel_connector *connector); +void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, + const struct drm_dp_desc *desc, bool is_branch, + struct intel_connector *connector); bool intel_dp_has_gamut_metadata_dip(struct intel_encoder *encoder); bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 352f7ef29c28..f2266b265304 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1658,6 +1658,7 @@ intel_dp_mst_read_decompression_port_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *connector) { u8 dpcd_caps[DP_RECEIVER_CAP_SIZE]; + struct drm_dp_desc desc; if (!connector->dp.dsc_decompression_aux) return; @@ -1665,7 +1666,13 @@ intel_dp_mst_read_decompression_port_dsc_caps(struct intel_dp *intel_dp, if (drm_dp_read_dpcd_caps(connector->dp.dsc_decompression_aux, dpcd_caps) < 0) return; - intel_dp_get_dsc_sink_cap(dpcd_caps[DP_DPCD_REV], connector); + if (drm_dp_read_desc(connector->dp.dsc_decompression_aux, &desc, + drm_dp_is_branch(dpcd_caps)) < 0) + return; + + intel_dp_get_dsc_sink_cap(dpcd_caps[DP_DPCD_REV], + &desc, drm_dp_is_branch(dpcd_caps), + connector); } static bool detect_dsc_hblank_expansion_quirk(const struct intel_connector *connector) -- cgit v1.2.3 From a086950670af7336e73c658567bc7f745c552c91 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:49 +0300 Subject: drm/i915/dp: Verify branch devices' overall pixel throughput/line width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read out the branch devices' maximum overall DSC pixel throughput and line width and verify the mode's corresponding pixel clock and hactive period against these. v2: Use drm helpers to query the throughput/line-width caps. (Ville) Cc: Ville Syrjälä Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-6-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 8 ++++ drivers/gpu/drm/i915/display/intel_dp.c | 51 ++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 029c47743f8b..99d2c3123692 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -552,6 +552,14 @@ struct intel_connector { u8 dsc_hblank_expansion_quirk:1; u8 dsc_decompression_enabled:1; + + struct { + struct { + int rgb_yuv444; + int yuv422_420; + } overall_throughput; + int max_line_width; + } dsc_branch_caps; } dp; struct { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2dcc20bcfa0b..9de9356302f9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1016,6 +1016,20 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, int tp_rgb_yuv444; int tp_yuv422_420; + /* + * TODO: Use the throughput value specific to the actual RGB/YUV + * format of the output. + * The RGB/YUV444 throughput value should be always either equal + * or smaller than the YUV422/420 value, but let's not depend on + * this assumption. + */ + if (mode_clock > max(connector->dp.dsc_branch_caps.overall_throughput.rgb_yuv444, + connector->dp.dsc_branch_caps.overall_throughput.yuv422_420)) + return 0; + + if (mode_hdisplay > connector->dp.dsc_branch_caps.max_line_width) + return 0; + /* * TODO: Pass the total pixel rate of all the streams transferred to * an MST tiled display, calculate the total slice count for all tiles @@ -4180,6 +4194,33 @@ static void intel_dp_read_dsc_dpcd(struct drm_dp_aux *aux, dsc_dpcd); } +static void init_dsc_overall_throughput_limits(struct intel_connector *connector, bool is_branch) +{ + u8 branch_caps[DP_DSC_BRANCH_CAP_SIZE]; + int line_width; + + connector->dp.dsc_branch_caps.overall_throughput.rgb_yuv444 = INT_MAX; + connector->dp.dsc_branch_caps.overall_throughput.yuv422_420 = INT_MAX; + connector->dp.dsc_branch_caps.max_line_width = INT_MAX; + + if (!is_branch) + return; + + if (drm_dp_dpcd_read_data(connector->dp.dsc_decompression_aux, + DP_DSC_BRANCH_OVERALL_THROUGHPUT_0, branch_caps, + sizeof(branch_caps)) != 0) + return; + + connector->dp.dsc_branch_caps.overall_throughput.rgb_yuv444 = + drm_dp_dsc_branch_max_overall_throughput(branch_caps, true) ? : INT_MAX; + + connector->dp.dsc_branch_caps.overall_throughput.yuv422_420 = + drm_dp_dsc_branch_max_overall_throughput(branch_caps, false) ? : INT_MAX; + + line_width = drm_dp_dsc_branch_max_line_width(branch_caps); + connector->dp.dsc_branch_caps.max_line_width = line_width > 0 ? line_width : INT_MAX; +} + void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, const struct drm_dp_desc *desc, bool is_branch, struct intel_connector *connector) @@ -4195,6 +4236,8 @@ void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, /* Clear fec_capable to avoid using stale values */ connector->dp.fec_capability = 0; + memset(&connector->dp.dsc_branch_caps, 0, sizeof(connector->dp.dsc_branch_caps)); + if (dpcd_rev < DP_DPCD_REV_14) return; @@ -4209,6 +4252,11 @@ void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, drm_dbg_kms(display->drm, "FEC CAPABILITY: %x\n", connector->dp.fec_capability); + + if (!(connector->dp.dsc_dpcd[0] & DP_DSC_DECOMPRESSION_IS_SUPPORTED)) + return; + + init_dsc_overall_throughput_limits(connector, is_branch); } static void intel_edp_get_dsc_sink_cap(u8 edp_dpcd_rev, struct intel_connector *connector) @@ -4217,6 +4265,9 @@ static void intel_edp_get_dsc_sink_cap(u8 edp_dpcd_rev, struct intel_connector * return; intel_dp_read_dsc_dpcd(connector->dp.dsc_decompression_aux, connector->dp.dsc_dpcd); + + if (connector->dp.dsc_dpcd[0] & DP_DSC_DECOMPRESSION_IS_SUPPORTED) + init_dsc_overall_throughput_limits(connector, false); } static void -- cgit v1.2.3 From 8d677285a363556db4cfb8b7ed54cff0bbc11d6a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Sep 2025 21:24:50 +0300 Subject: drm/i915/dp: Handle Synaptics DSC throughput link-bpp quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle the DSC pixel throughput quirk, limiting the compressed link-bpp value for Synaptics Panamera branch devices, working around a blank/unstable output issue observed on docking stations containing these branch devices, when using a mode with a high pixel clock and a high compressed link-bpp value. For now use the same mode clock limit for RGB/YUV444 and YUV422/420 output modes. This may result in limiting the link-bpp value for a YUV422/420 output mode already at a lower than required mode clock. v2: Apply the quirk only when DSC is enabled. v3 (Ville): - Move adjustment of link-bpp within the already existing is_dsc if branch. - Add TODO comment to move the HW revision check as well to the DRM core quirk table. v4: - Fix incorrect fxp_q4_from_int(INT_MAX) vs. INT_MAX return value from dsc_throughput_quirk_max_bpp_x16(). Cc: Ville Syrjälä Reported-by: Vidya Srinivas Reported-and-tested-by: Swati Sharma Reviewed-by: Ville Syrjälä Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250930182450.563016-7-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 57 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 99d2c3123692..ca5a87772e93 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -551,6 +551,7 @@ struct intel_connector { u8 fec_capability; u8 dsc_hblank_expansion_quirk:1; + u8 dsc_throughput_quirk:1; u8 dsc_decompression_enabled:1; struct { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 9de9356302f9..8a2fc1fcb5d0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2485,6 +2485,40 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, return 0; } +static int +dsc_throughput_quirk_max_bpp_x16(const struct intel_connector *connector, + const struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + if (!connector->dp.dsc_throughput_quirk) + return INT_MAX; + + /* + * Synaptics Panamera branch devices have a problem decompressing a + * stream with a compressed link-bpp higher than 12, if the pixel + * clock is higher than ~50 % of the maximum overall throughput + * reported by the branch device. Work around this by limiting the + * maximum link bpp for such pixel clocks. + * + * TODO: Use the throughput value specific to the actual RGB/YUV + * format of the output, after determining the pixel clock limit for + * YUV modes. For now use the smaller of the throughput values, which + * may result in limiting the link-bpp value already at a lower than + * required mode clock in case of native YUV422/420 output formats. + * The RGB/YUV444 throughput value should be always either equal or + * smaller than the YUV422/420 value, but let's not depend on this + * assumption. + */ + if (adjusted_mode->crtc_clock < + min(connector->dp.dsc_branch_caps.overall_throughput.rgb_yuv444, + connector->dp.dsc_branch_caps.overall_throughput.yuv422_420) / 2) + return INT_MAX; + + return fxp_q4_from_int(12); +} + /* * Calculate the output link min, max bpp values in limits based on the pipe bpp * range, crtc_state and dsc mode. Return true on success. @@ -2516,6 +2550,7 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, } else { int dsc_src_min_bpp, dsc_sink_min_bpp, dsc_min_bpp; int dsc_src_max_bpp, dsc_sink_max_bpp, dsc_max_bpp; + int throughput_max_bpp_x16; dsc_src_min_bpp = intel_dp_dsc_min_src_compressed_bpp(); dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); @@ -2530,6 +2565,19 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; max_link_bpp_x16 = min(max_link_bpp_x16, fxp_q4_from_int(dsc_max_bpp)); + + throughput_max_bpp_x16 = dsc_throughput_quirk_max_bpp_x16(connector, crtc_state); + throughput_max_bpp_x16 = clamp(throughput_max_bpp_x16, + limits->link.min_bpp_x16, max_link_bpp_x16); + if (throughput_max_bpp_x16 < max_link_bpp_x16) { + max_link_bpp_x16 = throughput_max_bpp_x16; + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s][CONNECTOR:%d:%s] Decreasing link max bpp to " FXP_Q4_FMT " due to DSC throughput quirk\n", + crtc->base.base.id, crtc->base.name, + connector->base.base.id, connector->base.name, + FXP_Q4_ARGS(max_link_bpp_x16)); + } } limits->link.max_bpp_x16 = max_link_bpp_x16; @@ -4237,6 +4285,7 @@ void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, connector->dp.fec_capability = 0; memset(&connector->dp.dsc_branch_caps, 0, sizeof(connector->dp.dsc_branch_caps)); + connector->dp.dsc_throughput_quirk = false; if (dpcd_rev < DP_DPCD_REV_14) return; @@ -4257,6 +4306,14 @@ void intel_dp_get_dsc_sink_cap(u8 dpcd_rev, return; init_dsc_overall_throughput_limits(connector, is_branch); + + /* + * TODO: Move the HW rev check as well to the DRM core quirk table if + * that's required after clarifying the list of affected devices. + */ + if (drm_dp_has_quirk(desc, DP_DPCD_QUIRK_DSC_THROUGHPUT_BPP_LIMIT) && + desc->ident.hw_rev == 0x10) + connector->dp.dsc_throughput_quirk = true; } static void intel_edp_get_dsc_sink_cap(u8 edp_dpcd_rev, struct intel_connector *connector) -- cgit v1.2.3 From defa5d7548e8569c4c06e7037a2d4110d0a71ac1 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 1 Oct 2025 13:04:48 -0300 Subject: drm/i915/display: Extract separate AUX PW descriptors In an upcoming change, we will fix an ordering issue between PICA and AUX power wells for Xe2_LPD and later, making sure that the driver acquires PICA power before AUX. As a preparation for that, let's extract separate descriptors for AUX power wells. Suggested-by: Imre Deak Reviewed-by: Imre Deak Link: https://lore.kernel.org/r/20251001-pica-power-before-aux-v2-1-6308df4de5a8@intel.com Signed-off-by: Gustavo Sousa --- .../gpu/drm/i915/display/intel_display_power_map.c | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index 39b71fffa2cd..e89f18b7037f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -1516,7 +1516,11 @@ static const struct i915_power_well_desc xelpdp_power_wells_main[] = { .ops = &hsw_power_well_ops, .irq_pipe_mask = BIT(PIPE_D), .has_fuses = true, - }, { + }, +}; + +static const struct i915_power_well_desc xelpdp_power_wells_aux[] = { + { .instances = &I915_PW_INSTANCES( I915_PW("AUX_A", &icl_pwdoms_aux_a, .xelpdp.aux_ch = AUX_CH_A), I915_PW("AUX_B", &icl_pwdoms_aux_b, .xelpdp.aux_ch = AUX_CH_B), @@ -1534,6 +1538,7 @@ static const struct i915_power_well_desc_list xelpdp_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xelpd_power_wells_dc_off), I915_PW_DESCRIPTORS(xelpdp_power_wells_main), + I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), }; I915_DECL_PW_DOMAINS(xe2lpd_pwdoms_pica_tc, @@ -1583,6 +1588,7 @@ static const struct i915_power_well_desc_list xe2lpd_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe2lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(xelpdp_power_wells_main), + I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), }; @@ -1677,16 +1683,6 @@ static const struct i915_power_well_desc xe3lpd_power_wells_main[] = { .ops = &hsw_power_well_ops, .irq_pipe_mask = BIT(PIPE_D), .has_fuses = true, - }, { - .instances = &I915_PW_INSTANCES( - I915_PW("AUX_A", &icl_pwdoms_aux_a, .xelpdp.aux_ch = AUX_CH_A), - I915_PW("AUX_B", &icl_pwdoms_aux_b, .xelpdp.aux_ch = AUX_CH_B), - I915_PW("AUX_TC1", &xelpdp_pwdoms_aux_tc1, .xelpdp.aux_ch = AUX_CH_USBC1), - I915_PW("AUX_TC2", &xelpdp_pwdoms_aux_tc2, .xelpdp.aux_ch = AUX_CH_USBC2), - I915_PW("AUX_TC3", &xelpdp_pwdoms_aux_tc3, .xelpdp.aux_ch = AUX_CH_USBC3), - I915_PW("AUX_TC4", &xelpdp_pwdoms_aux_tc4, .xelpdp.aux_ch = AUX_CH_USBC4), - ), - .ops = &xelpdp_aux_power_well_ops, }, }; @@ -1714,6 +1710,7 @@ static const struct i915_power_well_desc_list xe3lpd_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(xe3lpd_power_wells_main), + I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), }; @@ -1751,7 +1748,11 @@ static const struct i915_power_well_desc wcl_power_wells_main[] = { .ops = &hsw_power_well_ops, .irq_pipe_mask = BIT(PIPE_C), .has_fuses = true, - }, { + }, +}; + +static const struct i915_power_well_desc wcl_power_wells_aux[] = { + { .instances = &I915_PW_INSTANCES( I915_PW("AUX_A", &icl_pwdoms_aux_a, .xelpdp.aux_ch = AUX_CH_A), I915_PW("AUX_B", &icl_pwdoms_aux_b, .xelpdp.aux_ch = AUX_CH_B), @@ -1767,6 +1768,7 @@ static const struct i915_power_well_desc_list wcl_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(wcl_power_wells_main), + I915_PW_DESCRIPTORS(wcl_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), }; -- cgit v1.2.3 From df66786e6999e59e822498ec8cdf0bbf691019b7 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 1 Oct 2025 13:04:49 -0300 Subject: drm/i915/display: Enable PICA power before AUX According to Bspec, before enabling AUX power, we need to have the "power well containing Aux logic powered up". Starting with Xe2_LPD, such power well is the "PICA" power well, which is managed by the driver on demand. While we did add the mapping of AUX power domains to the PICA power well, we ended up placing its power well descriptor after the descriptor for AUX power. As a result, when enabling power wells for one of the aux power domains, the driver will enable AUX power before PICA power, going against the order specified in Bspec. It appears that issue did not become apparent to us mainly because, luckily, AUX power is brought up after we assert PICA power, even if done in the wrong order; and in enough time for the first AUX transaction to succeed. Furthermore, I have also realized that, in some cases, like driver initialization, PICA power is already up when we need to acquire AUX power. One case where we can observe the incorrect ordering is when the driver is resuming from runtime PM suspend. Here is an excerpt of a dmesg with some extra debug logs extracted from a LNL machine to illustrate the issue: [ +0.000156] xe 0000:00:02.0: [drm:intel_power_well_enable [xe]] enabling AUX_TC1 [ +0.001312] xe 0000:00:02.0: [drm:xelpdp_aux_power_well_enable [xe]] DBG: AUX_CH_USBC1 power status: 0 [ +0.000127] xe 0000:00:02.0: [drm:intel_power_well_enable [xe]] enabling PICA_TC [ +0.001072] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC1 power status: 1 [ +0.000102] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC2 power status: 0 [ +0.000090] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC3 power status: 0 [ +0.000092] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC4 power status: 0 The first "DBG: ..." line shows that AUX power for TC1 is off after we assert and wait. The remaining lines show that AUX power for TC1 was on after we enabled PICA power and waited for AUX power. It is important that we stay compliant with the spec, so let's fix this by listing the power wells in an order that matches the requirements from Bspec. (As a side note, it would be nice if we could define those dependencies explicitly.) After this change, we have: [ +0.000146] xe 0000:00:02.0: [drm:intel_power_well_enable [xe]] enabling PICA_TC [ +0.001417] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC1 power status: 0 [ +0.000116] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC2 power status: 0 [ +0.000096] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC3 power status: 0 [ +0.000094] xe 0000:00:02.0: [drm:xe2lpd_pica_power_well_enable [xe]] DBG: AUX_CH_USBC4 power status: 0 [ +0.000095] xe 0000:00:02.0: [drm:intel_power_well_enable [xe]] enabling AUX_TC1 [ +0.000915] xe 0000:00:02.0: [drm:xelpdp_aux_power_well_enable [xe]] DBG: AUX_CH_USBC1 power status: 1 Bspec: 68967, 68886, 72519 Reviewed-by: Imre Deak Link: https://lore.kernel.org/r/20251001-pica-power-before-aux-v2-2-6308df4de5a8@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_display_power_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index e89f18b7037f..9b49952994ce 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -1588,8 +1588,8 @@ static const struct i915_power_well_desc_list xe2lpd_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe2lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(xelpdp_power_wells_main), - I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), + I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), }; /* @@ -1710,8 +1710,8 @@ static const struct i915_power_well_desc_list xe3lpd_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(xe3lpd_power_wells_main), - I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), + I915_PW_DESCRIPTORS(xelpdp_power_wells_aux), }; static const struct i915_power_well_desc wcl_power_wells_main[] = { @@ -1768,8 +1768,8 @@ static const struct i915_power_well_desc_list wcl_power_wells[] = { I915_PW_DESCRIPTORS(icl_power_wells_pw_1), I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), I915_PW_DESCRIPTORS(wcl_power_wells_main), - I915_PW_DESCRIPTORS(wcl_power_wells_aux), I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), + I915_PW_DESCRIPTORS(wcl_power_wells_aux), }; static void init_power_well_domains(const struct i915_power_well_instance *inst, -- cgit v1.2.3 From ea1a866a30153c743e4b933d2788277bec0016e0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:08:36 +0300 Subject: drm/i915: Use the the correct pixel rate to compute wm line time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The line time used for the watermark calculations is supposed to based on the plane's adjusted pixel rate, not the pipe's adjusted pixel rate. The current code will give incorrect answers if plane downscaling is used. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919180838.10498-2-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai --- drivers/gpu/drm/i915/display/skl_watermark.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 9eb28d935757..f73d1d24a488 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1637,18 +1637,16 @@ skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, } static uint_fixed_16_16_t -intel_get_linetime_us(const struct intel_crtc_state *crtc_state) +intel_get_linetime_us(const struct intel_crtc_state *crtc_state, + int pixel_rate) { struct intel_display *display = to_intel_display(crtc_state); - u32 pixel_rate; u32 crtc_htotal; uint_fixed_16_16_t linetime_us; if (!crtc_state->hw.active) return u32_to_fixed16(0); - pixel_rate = crtc_state->pixel_rate; - if (drm_WARN_ON(display->drm, pixel_rate == 0)) return u32_to_fixed16(0); @@ -1743,7 +1741,8 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, wp->plane_blocks_per_line); - wp->linetime_us = fixed16_to_u32_round_up(intel_get_linetime_us(crtc_state)); + wp->linetime_us = fixed16_to_u32_round_up(intel_get_linetime_us(crtc_state, + plane_pixel_rate)); return 0; } -- cgit v1.2.3 From 09f21bee91b2d47c57e8f3ee31632c51b1799806 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:08:37 +0300 Subject: drm/i915: Deobfuscate wm linetime calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_get_linetime_us() is a mess. Rewrite it in a straightforward manner. Also the checks for the !active and pixel_rate==0 are completely pointless here since we know that the plane is visible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919180838.10498-3-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai --- drivers/gpu/drm/i915/display/skl_watermark.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index f73d1d24a488..93cad11e0b31 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1636,24 +1636,12 @@ skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, return ret; } -static uint_fixed_16_16_t +static int intel_get_linetime_us(const struct intel_crtc_state *crtc_state, int pixel_rate) { - struct intel_display *display = to_intel_display(crtc_state); - u32 crtc_htotal; - uint_fixed_16_16_t linetime_us; - - if (!crtc_state->hw.active) - return u32_to_fixed16(0); - - if (drm_WARN_ON(display->drm, pixel_rate == 0)) - return u32_to_fixed16(0); - - crtc_htotal = crtc_state->hw.pipe_mode.crtc_htotal; - linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate); - - return linetime_us; + return DIV_ROUND_UP(crtc_state->hw.pipe_mode.crtc_htotal * 1000, + pixel_rate); } static int @@ -1741,8 +1729,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, wp->plane_blocks_per_line); - wp->linetime_us = fixed16_to_u32_round_up(intel_get_linetime_us(crtc_state, - plane_pixel_rate)); + wp->linetime_us = intel_get_linetime_us(crtc_state, plane_pixel_rate); return 0; } -- cgit v1.2.3 From 32fdf8f418d74da3e22a2c1319285010192edc73 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 19 Sep 2025 21:08:38 +0300 Subject: drm/i915: s/intel_get_linetime_us()/skl_wm_linetime_us()/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_get_linetime_us() to skl_wm_linetime_us() to better reflect that it's not meant to be used for anything apart from the watermark calculations. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250919180838.10498-4-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai --- drivers/gpu/drm/i915/display/skl_watermark.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 93cad11e0b31..9df9ee137bf9 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1636,9 +1636,8 @@ skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, return ret; } -static int -intel_get_linetime_us(const struct intel_crtc_state *crtc_state, - int pixel_rate) +static int skl_wm_linetime_us(const struct intel_crtc_state *crtc_state, + int pixel_rate) { return DIV_ROUND_UP(crtc_state->hw.pipe_mode.crtc_htotal * 1000, pixel_rate); @@ -1729,7 +1728,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, wp->plane_blocks_per_line); - wp->linetime_us = intel_get_linetime_us(crtc_state, plane_pixel_rate); + wp->linetime_us = skl_wm_linetime_us(crtc_state, plane_pixel_rate); return 0; } -- cgit v1.2.3 From 2bc98c6f97afaa55d7d1d3b66ea850166180d4fa Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 29 Sep 2025 16:00:02 +0300 Subject: drm/i915/alpm: Compute ALPM parameters into crtc_state->alpm_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ALPM parameters are computed directly into intel_dp->alpm_parameters. This is a problem when compute config ends up to not using the computed state. Fix this by adding ALPM parameters into intel_crtc_state and compute into there. Copy needed parameters (io_wake_lines and fast_wake_lines used by PSR activate/exit) from crtc_state->alpm_state into intel_dp->alpm.alpm_parameters when they are configured into HW. v3: - enhance commit message v2: - store io/fast wake lines into intel_dp->dp instead of intel_dp->alpm_parameters and do it in intel_psr_enable_locked - rename crtc_state->alpm_parameters -> crtc_state->alpm_state - clarify commit message Signed-off-by: Jouni Högander Reviewed-by: Animesh Manna Link: https://lore.kernel.org/r/20250929130003.28365-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_alpm.c | 36 +++++++++++----------- drivers/gpu/drm/i915/display/intel_alpm.h | 2 +- drivers/gpu/drm/i915/display/intel_display_types.h | 21 ++++++++----- drivers/gpu/drm/i915/display/intel_psr.c | 36 ++++++++++++---------- 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 749119cc0b28..9f4a9d8b4dec 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -122,7 +122,7 @@ static int _lnl_compute_aux_less_wake_time(const struct intel_crtc_state *crtc_s static int _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(intel_dp); int aux_less_wake_time, aux_less_wake_lines, silence_period, @@ -144,15 +144,15 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, if (display->params.psr_safest_params) aux_less_wake_lines = ALPM_CTL_AUX_LESS_WAKE_TIME_MASK; - intel_dp->alpm_parameters.aux_less_wake_lines = aux_less_wake_lines; - intel_dp->alpm_parameters.silence_period_sym_clocks = silence_period; - intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms = lfps_half_cycle; + crtc_state->alpm_state.aux_less_wake_lines = aux_less_wake_lines; + crtc_state->alpm_state.silence_period_sym_clocks = silence_period; + crtc_state->alpm_state.lfps_half_cycle_num_of_syms = lfps_half_cycle; return true; } static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(intel_dp); int check_entry_lines; @@ -173,7 +173,7 @@ static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, if (display->params.psr_safest_params) check_entry_lines = 15; - intel_dp->alpm_parameters.check_entry_lines = check_entry_lines; + crtc_state->alpm_state.check_entry_lines = check_entry_lines; return true; } @@ -204,7 +204,7 @@ static int io_buffer_wake_time(const struct intel_crtc_state *crtc_state) } bool intel_alpm_compute_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(intel_dp); int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time; @@ -242,8 +242,8 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, io_wake_lines = fast_wake_lines = max_wake_lines; /* According to Bspec lower limit should be set as 7 lines. */ - intel_dp->alpm_parameters.io_wake_lines = max(io_wake_lines, 7); - intel_dp->alpm_parameters.fast_wake_lines = max(fast_wake_lines, 7); + crtc_state->alpm_state.io_wake_lines = max(io_wake_lines, 7); + crtc_state->alpm_state.fast_wake_lines = max(fast_wake_lines, 7); return true; } @@ -293,9 +293,9 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, adjusted_mode->crtc_vdisplay - context_latency; first_sdp_position = adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vsync_start; if (intel_alpm_aux_less_wake_supported(intel_dp)) - waketime_in_lines = intel_dp->alpm_parameters.io_wake_lines; + waketime_in_lines = crtc_state->alpm_state.io_wake_lines; else - waketime_in_lines = intel_dp->alpm_parameters.aux_less_wake_lines; + waketime_in_lines = crtc_state->alpm_state.aux_less_wake_lines; crtc_state->has_lobf = (context_latency + guardband) > (first_sdp_position + waketime_in_lines); @@ -321,7 +321,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, alpm_ctl = ALPM_CTL_ALPM_ENABLE | ALPM_CTL_ALPM_AUX_LESS_ENABLE | ALPM_CTL_AUX_LESS_SLEEP_HOLD_TIME_50_SYMBOLS | - ALPM_CTL_AUX_LESS_WAKE_TIME(intel_dp->alpm_parameters.aux_less_wake_lines); + ALPM_CTL_AUX_LESS_WAKE_TIME(crtc_state->alpm_state.aux_less_wake_lines); if (intel_dp->as_sdp_supported) { u32 pr_alpm_ctl = PR_ALPM_CTL_ADAPTIVE_SYNC_SDP_POSITION_T1; @@ -339,7 +339,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, } else { alpm_ctl = ALPM_CTL_EXTENDED_FAST_WAKE_ENABLE | - ALPM_CTL_EXTENDED_FAST_WAKE_TIME(intel_dp->alpm_parameters.fast_wake_lines); + ALPM_CTL_EXTENDED_FAST_WAKE_TIME(crtc_state->alpm_state.fast_wake_lines); } if (crtc_state->has_lobf) { @@ -347,7 +347,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, drm_dbg_kms(display->drm, "Link off between frames (LOBF) enabled\n"); } - alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(intel_dp->alpm_parameters.check_entry_lines); + alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(crtc_state->alpm_state.check_entry_lines); intel_de_write(display, ALPM_CTL(display, cpu_transcoder), alpm_ctl); mutex_unlock(&intel_dp->alpm_parameters.lock); @@ -375,14 +375,14 @@ void intel_alpm_port_configure(struct intel_dp *intel_dp, PORT_ALPM_CTL_MAX_PHY_SWING_SETUP(15) | PORT_ALPM_CTL_MAX_PHY_SWING_HOLD(0) | PORT_ALPM_CTL_SILENCE_PERIOD( - intel_dp->alpm_parameters.silence_period_sym_clocks); + crtc_state->alpm_state.silence_period_sym_clocks); lfps_ctl_val = PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(LFPS_CYCLE_COUNT) | PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION( - intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | + crtc_state->alpm_state.lfps_half_cycle_num_of_syms) | PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION( - intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | + crtc_state->alpm_state.lfps_half_cycle_num_of_syms) | PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION( - intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms); + crtc_state->alpm_state.lfps_half_cycle_num_of_syms); } intel_de_write(display, PORT_ALPM_CTL(port), alpm_ctl_val); diff --git a/drivers/gpu/drm/i915/display/intel_alpm.h b/drivers/gpu/drm/i915/display/intel_alpm.h index a861c20b5d79..53599b464dea 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.h +++ b/drivers/gpu/drm/i915/display/intel_alpm.h @@ -17,7 +17,7 @@ struct intel_crtc; void intel_alpm_init(struct intel_dp *intel_dp); bool intel_alpm_compute_params(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); + struct intel_crtc_state *crtc_state); void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ca5a87772e93..e316898ab675 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1353,6 +1353,17 @@ struct intel_crtc_state { /* W2 window or 'set context latency' lines */ u16 set_context_latency; + + struct { + u8 io_wake_lines; + u8 fast_wake_lines; + + /* LNL and beyond */ + u8 check_entry_lines; + u8 aux_less_wake_lines; + u8 silence_period_sym_clocks; + u8 lfps_half_cycle_num_of_syms; + } alpm_state; }; enum intel_pipe_crc_source { @@ -1697,6 +1708,9 @@ struct intel_psr { struct delayed_work dc3co_work; u8 entry_setup_frames; + u8 io_wake_lines; + u8 fast_wake_lines; + bool link_ok; bool pkg_c_latency_used; @@ -1856,16 +1870,9 @@ struct intel_dp { bool colorimetry_support; struct { - u8 io_wake_lines; - u8 fast_wake_lines; enum transcoder transcoder; struct mutex lock; - /* LNL and beyond */ - u8 check_entry_lines; - u8 aux_less_wake_lines; - u8 silence_period_sym_clocks; - u8 lfps_half_cycle_num_of_syms; bool lobf_disable_debug; bool sink_alpm_error; } alpm_parameters; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index f7115969b4c5..2131473cead6 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -956,15 +956,16 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) return val; } -static int psr2_block_count_lines(struct intel_dp *intel_dp) +static int +psr2_block_count_lines(u8 io_wake_lines, u8 fast_wake_lines) { - return intel_dp->alpm_parameters.io_wake_lines < 9 && - intel_dp->alpm_parameters.fast_wake_lines < 9 ? 8 : 12; + return io_wake_lines < 9 && fast_wake_lines < 9 ? 8 : 12; } static int psr2_block_count(struct intel_dp *intel_dp) { - return psr2_block_count_lines(intel_dp) / 4; + return psr2_block_count_lines(intel_dp->psr.io_wake_lines, + intel_dp->psr.fast_wake_lines) / 4; } static u8 frames_before_su_entry(struct intel_dp *intel_dp) @@ -1059,20 +1060,20 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) */ int tmp; - tmp = map[intel_dp->alpm_parameters.io_wake_lines - + tmp = map[intel_dp->psr.io_wake_lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES]; val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(tmp + TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES); - tmp = map[intel_dp->alpm_parameters.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; + tmp = map[intel_dp->psr.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; val |= TGL_EDP_PSR2_FAST_WAKE(tmp + TGL_EDP_PSR2_FAST_WAKE_MIN_LINES); } else if (DISPLAY_VER(display) >= 20) { - val |= LNL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->alpm_parameters.io_wake_lines); + val |= LNL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines); } else if (DISPLAY_VER(display) >= 12) { - val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->alpm_parameters.io_wake_lines); - val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->alpm_parameters.fast_wake_lines); + val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines); + val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines); } else if (DISPLAY_VER(display) >= 9) { - val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->alpm_parameters.io_wake_lines); - val |= EDP_PSR2_FAST_WAKE(intel_dp->alpm_parameters.fast_wake_lines); + val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines); + val |= EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines); } if (intel_dp->psr.req_psr2_sdp_prior_scanline) @@ -1370,11 +1371,12 @@ static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, int wake_lines; if (aux_less) - wake_lines = intel_dp->alpm_parameters.aux_less_wake_lines; + wake_lines = crtc_state->alpm_state.aux_less_wake_lines; else wake_lines = DISPLAY_VER(display) < 20 ? - psr2_block_count_lines(intel_dp) : - intel_dp->alpm_parameters.io_wake_lines; + psr2_block_count_lines(crtc_state->alpm_state.io_wake_lines, + crtc_state->alpm_state.fast_wake_lines) : + crtc_state->alpm_state.io_wake_lines; if (crtc_state->req_psr2_sdp_prior_scanline) vblank -= 1; @@ -1387,7 +1389,7 @@ static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, } static bool alpm_config_valid(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, + struct intel_crtc_state *crtc_state, bool aux_less) { struct intel_display *display = to_intel_display(intel_dp); @@ -1592,7 +1594,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, static bool _panel_replay_compute_config(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, + struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(intel_dp); @@ -2022,6 +2024,8 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, crtc_state->req_psr2_sdp_prior_scanline; intel_dp->psr.active_non_psr_pipes = crtc_state->active_non_psr_pipes; intel_dp->psr.pkg_c_latency_used = crtc_state->pkg_c_latency_used; + intel_dp->psr.io_wake_lines = crtc_state->alpm_state.io_wake_lines; + intel_dp->psr.fast_wake_lines = crtc_state->alpm_state.fast_wake_lines; if (!psr_interrupt_error_check(intel_dp)) return; -- cgit v1.2.3 From 0676602f845313ff2ea0a44a39488e5f6b8720d5 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 29 Sep 2025 16:00:03 +0300 Subject: drm/i915/alpm: Remove parameters suffix from intel_dp->alpm_parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now as intel_dp->alpm_parameters doesn't really contain any parameters it doesn't make sense to call it as alpm_parameters -> remove parameters suffix. Signed-off-by: Jouni Högander Reviewed-by: Animesh Manna Link: https://lore.kernel.org/r/20250929130003.28365-2-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_alpm.c | 26 +++++++++++----------- drivers/gpu/drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 9f4a9d8b4dec..6372f533f65b 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -49,7 +49,7 @@ void intel_alpm_init(struct intel_dp *intel_dp) return; intel_dp->alpm_dpcd = dpcd; - mutex_init(&intel_dp->alpm_parameters.lock); + mutex_init(&intel_dp->alpm.lock); } static int get_silence_period_symbols(const struct intel_crtc_state *crtc_state) @@ -257,12 +257,12 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, int waketime_in_lines, first_sdp_position; int context_latency, guardband; - if (intel_dp->alpm_parameters.lobf_disable_debug) { + if (intel_dp->alpm.lobf_disable_debug) { drm_dbg_kms(display->drm, "LOBF is disabled by debug flag\n"); return; } - if (intel_dp->alpm_parameters.sink_alpm_error) + if (intel_dp->alpm.sink_alpm_error) return; if (!intel_dp_is_edp(intel_dp)) @@ -312,7 +312,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, !crtc_state->has_lobf)) return; - mutex_lock(&intel_dp->alpm_parameters.lock); + mutex_lock(&intel_dp->alpm.lock); /* * Panel Replay on eDP is always using ALPM aux less. I.e. no need to * check panel support at this point. @@ -350,14 +350,14 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(crtc_state->alpm_state.check_entry_lines); intel_de_write(display, ALPM_CTL(display, cpu_transcoder), alpm_ctl); - mutex_unlock(&intel_dp->alpm_parameters.lock); + mutex_unlock(&intel_dp->alpm.lock); } void intel_alpm_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { lnl_alpm_configure(intel_dp, crtc_state); - intel_dp->alpm_parameters.transcoder = crtc_state->cpu_transcoder; + intel_dp->alpm.transcoder = crtc_state->cpu_transcoder; } void intel_alpm_port_configure(struct intel_dp *intel_dp, @@ -420,10 +420,10 @@ void intel_alpm_pre_plane_update(struct intel_atomic_state *state, continue; if (old_crtc_state->has_lobf) { - mutex_lock(&intel_dp->alpm_parameters.lock); + mutex_lock(&intel_dp->alpm.lock); intel_de_write(display, ALPM_CTL(display, cpu_transcoder), 0); drm_dbg_kms(display->drm, "Link off between frames (LOBF) disabled\n"); - mutex_unlock(&intel_dp->alpm_parameters.lock); + mutex_unlock(&intel_dp->alpm.lock); } } } @@ -517,7 +517,7 @@ i915_edp_lobf_debug_get(void *data, u64 *val) struct intel_connector *connector = data; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - *val = intel_dp->alpm_parameters.lobf_disable_debug; + *val = intel_dp->alpm.lobf_disable_debug; return 0; } @@ -528,7 +528,7 @@ i915_edp_lobf_debug_set(void *data, u64 val) struct intel_connector *connector = data; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - intel_dp->alpm_parameters.lobf_disable_debug = val; + intel_dp->alpm.lobf_disable_debug = val; return 0; } @@ -556,12 +556,12 @@ void intel_alpm_lobf_debugfs_add(struct intel_connector *connector) void intel_alpm_disable(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->alpm_parameters.transcoder; + enum transcoder cpu_transcoder = intel_dp->alpm.transcoder; if (DISPLAY_VER(display) < 20 || !intel_dp->alpm_dpcd) return; - mutex_lock(&intel_dp->alpm_parameters.lock); + mutex_lock(&intel_dp->alpm.lock); intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder), ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE | @@ -572,7 +572,7 @@ void intel_alpm_disable(struct intel_dp *intel_dp) PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); drm_dbg_kms(display->drm, "Disabling ALPM\n"); - mutex_unlock(&intel_dp->alpm_parameters.lock); + mutex_unlock(&intel_dp->alpm.lock); } bool intel_alpm_get_error(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e316898ab675..87b7cec35320 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1875,7 +1875,7 @@ struct intel_dp { bool lobf_disable_debug; bool sink_alpm_error; - } alpm_parameters; + } alpm; u8 alpm_dpcd; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 8a2fc1fcb5d0..a723e846321f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5675,7 +5675,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) if (intel_alpm_get_error(intel_dp)) { intel_alpm_disable(intel_dp); - intel_dp->alpm_parameters.sink_alpm_error = true; + intel_dp->alpm.sink_alpm_error = true; } if (intel_dp_test_short_pulse(intel_dp)) -- cgit v1.2.3 From 604b5ee4a653a70979ce689dbd6a5d942eb016bf Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 29 Sep 2025 11:29:04 -0400 Subject: drm/i915/guc: Skip communication warning on reset in progress GuC IRQ and tasklet handler receive just single G2H message, and let other messages to be received from next tasklet. During this chained tasklet process, if reset process started, communication will be disabled. Skip warning for this condition. Fixes: 65dd4ed0f4e1 ("drm/i915/guc: Don't receive all G2H messages in irq handler") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15018 Signed-off-by: Zhanjun Dong Reviewed-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250929152904.269776-1-zhanjun.dong@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 380a11c92d63..5441d2201d19 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1324,9 +1324,16 @@ static int ct_receive(struct intel_guc_ct *ct) static void ct_try_receive_message(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); int ret; - if (GEM_WARN_ON(!ct->enabled)) + if (!ct->enabled) { + GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress); + return; + } + + /* When interrupt disabled, message handling is not expected */ + if (!guc->interrupts.enabled) return; ret = ct_receive(ct); -- cgit v1.2.3 From ed1fbee3debbfcf6fd69a69509545ace00118f82 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 28 Aug 2025 11:09:43 +0200 Subject: drm/i915: Disable tracepoints for PREEMPT_RT Luca Abeni reported this: | BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 | CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10 | Call Trace: | rt_spin_lock+0x3f/0x50 | gen6_read32+0x45/0x1d0 [i915] | g4x_get_vblank_counter+0x36/0x40 [i915] | trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915] The tracing events use trace_intel_pipe_update_start() among other events use functions acquire spinlock_t locks which are transformed into sleeping locks on PREEMPT_RT. A few trace points use intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also might acquire a sleeping locks on PREEMPT_RT. At the time the arguments are evaluated within trace point, preemption is disabled and so the locks must not be acquired on PREEMPT_RT. Based on this I don't see any other way than disable trace points on PREMPT_RT. [mlankhorst] The original patch was insufficient, and since the tracing infrastructure does not allow for partial disabling of tracepoints. Completely disable tracing for the entire i915 driver in PREEMPT_RT, a separate fix for display tracepoints on xe is added to make those work. Cc: Tvrtko Ursulin Reported-by: Luca Abeni Cc: Steven Rostedt Co-developed-by: Sebastian Andrzej Siewior Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20250828090944.101069-1-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/i915/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 78a45a6681df..6d7800e25e55 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -13,6 +13,11 @@ subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # drivers. Define I915 when building i915. subdir-ccflags-y += -DI915 +# FIXME: Disable tracepoints on i915 for PREEMPT_RT, unfortunately +# it's an all or nothing flag. You cannot selectively disable +# only some tracepoints. +subdir-ccflags-$(CONFIG_PREEMPT_RT) += -DNOTRACE + subdir-ccflags-y += -I$(src) # Please keep these build lists sorted! -- cgit v1.2.3 From 76e46dbf80918ef0dc8e489f88959bab48b6b9ce Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 29 Aug 2025 15:17:02 +0200 Subject: drm/i915/display: Make intel_crtc_get_vblank_counter safe on PREEMPT_RT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_crtc_accurate_vblank_count takes a spinlock, which we should avoid in tracepoints and debug functions. This also prevents taking the spinlock 2x during the critical section of pipe updates for DSI updates. Acked-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250829131701.15607-2-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/i915/display/intel_crtc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index a187db6df2d3..c0329e132462 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -84,8 +84,13 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) if (!crtc->active) return 0; - if (!vblank->max_vblank_count) - return (u32)drm_crtc_accurate_vblank_count(&crtc->base); + if (!vblank->max_vblank_count) { + /* On preempt-rt we cannot take the vblank spinlock since this function is called from tracepoints */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return (u32)drm_crtc_vblank_count(&crtc->base); + else + return (u32)drm_crtc_accurate_vblank_count(&crtc->base); + } return crtc->base.funcs->get_vblank_counter(&crtc->base); } -- cgit v1.2.3 From eb4d490729a5fd8dc5a76d334f8d01fec7c14bbe Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:30 +0300 Subject: drm/i915/frontbuffer: Move bo refcounting intel_frontbuffer_{get,release}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xe's intel_frontbuffer implementation forgets to hold a reference on the bo. This makes the entire thing extremely fragile as the cleanup order now depends on bo references held by other things (namely intel_fb_bo_framebuffer_fini()). Move the bo refcounting to intel_frontbuffer_{get,release}() so that both i915 and xe do this the same way. I first tried to fix this by having xe do the refcounting from its intel_bo_set_frontbuffer() implementation (which is what i915 does currently), but turns out xe's drm_gem_object_free() can sleep and thus drm_gem_object_put() isn't safe to call while we hold fb_tracking.lock. Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_frontbuffer.c | 10 +++++++++- drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h | 2 -- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 43be5377ddc1..73ed28ac9573 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -270,6 +270,8 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&display->fb_tracking.lock); i915_active_fini(&front->write); + + drm_gem_object_put(obj); kfree_rcu(front, rcu); } @@ -287,6 +289,8 @@ intel_frontbuffer_get(struct drm_gem_object *obj) if (!front) return NULL; + drm_gem_object_get(obj); + front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); @@ -299,8 +303,12 @@ intel_frontbuffer_get(struct drm_gem_object *obj) spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); spin_unlock(&display->fb_tracking.lock); - if (cur != front) + + if (cur != front) { + drm_gem_object_put(obj); kfree(front); + } + return cur; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b6dc3d1b9bb1..b682969e3a29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,12 +89,10 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); - drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); } else { - drm_gem_object_get(intel_bo_to_drm_bo(obj)); rcu_assign_pointer(obj->frontbuffer, front); } -- cgit v1.2.3 From 1d1e4ded216017f8febd91332ee337f0e0e79285 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:31 +0300 Subject: drm/i915/fb: Fix the set_tiling vs. addfb race, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_frontbuffer_get() is what locks out subsequent set_tiling changes to the bo. Thus the fence vs. modifier check must be done after intel_frontbuffer_get(), or else a concurrent set_tiling ioctl might sneak in and change the fence after the check has been done. Close the race again. See commit dd689287b977 ("drm/i915: Prevent concurrent tiling/framebuffer modifications") for the previous instance. v2: Reorder intel_user_framebuffer_destroy() to match the unwind (Jani) Cc: Jouni Högander Reviewed-by: Jani Nikula Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 69237dabdae8..4dfb3e5fd31e 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2113,10 +2113,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); - intel_frontbuffer_put(intel_fb->frontbuffer); - intel_fb_bo_framebuffer_fini(intel_fb_bo(fb)); + intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb); } @@ -2218,15 +2218,17 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, int ret = -EINVAL; int i; + /* + * intel_frontbuffer_get() must be done before + * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. + */ + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) + return -ENOMEM; + ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); if (ret) - return ret; - - intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) { - ret = -ENOMEM; - goto err; - } + goto err_frontbuffer_put; ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, @@ -2235,7 +2237,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, @@ -2246,7 +2248,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2254,7 +2256,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); @@ -2264,7 +2266,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2272,7 +2274,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2282,7 +2284,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } } @@ -2291,7 +2293,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(display, intel_fb); if (ret) - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2317,10 +2319,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); +err_bo_framebuffer_fini: + intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); -err: - intel_fb_bo_framebuffer_fini(obj); return ret; } -- cgit v1.2.3 From 89394e03afcda65a4cce494c5e2413f697d19663 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:32 +0300 Subject: drm/i915/fbdev: Select linear modifier explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we use the implicit modifier fb creation path for fbdev, but as we never call set_tiling on the bo it will always end up as linear anyway. The rest of the code (eg. stride alignment) also assumes that we'll use linear. Just select the linear modifier explicitly. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 3fbdf75415cc..51d3d87caf43 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -214,12 +214,14 @@ static void intel_fbdev_fill_mode_cmd(struct drm_fb_helper_surface_size *sizes, if (sizes->surface_bpp == 24) sizes->surface_bpp = 32; + mode_cmd->flags = DRM_MODE_FB_MODIFIERS; mode_cmd->width = sizes->surface_width; mode_cmd->height = sizes->surface_height; mode_cmd->pitches[0] = ALIGN(mode_cmd->width * DIV_ROUND_UP(sizes->surface_bpp, 8), 64); mode_cmd->pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + mode_cmd->modifier[0] = DRM_FORMAT_MOD_LINEAR; } static struct intel_framebuffer * -- cgit v1.2.3 From d76eeea515700c228c251220a5727a38824393c6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:33 +0300 Subject: drm/i915/fb: Drop the 'fb' argument from intel_fb_bo_framebuffer_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_fb_bo_framebuffer_init() doesn't do anything with the passed framebuffer. Don't pass it therefore. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- drivers/gpu/drm/i915/display/intel_fb_bo.c | 3 +-- drivers/gpu/drm/i915/display/intel_fb_bo.h | 3 +-- drivers/gpu/drm/xe/display/intel_fb_bo.c | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 4dfb3e5fd31e..99823ef42ef1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2226,7 +2226,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (!intel_fb->frontbuffer) return -ENOMEM; - ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); + ret = intel_fb_bo_framebuffer_init(obj, mode_cmd); if (ret) goto err_frontbuffer_put; diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c index b0e8b89f7ce8..7336d7294a7b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_bo.c +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c @@ -18,8 +18,7 @@ void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj) /* Nothing to do for i915 */ } -int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb, - struct drm_gem_object *_obj, +int intel_fb_bo_framebuffer_init(struct drm_gem_object *_obj, struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_i915_gem_object *obj = to_intel_bo(_obj); diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h index eefcb05a99f0..d775773c6c03 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_bo.h +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h @@ -14,8 +14,7 @@ struct drm_mode_fb_cmd2; void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj); -int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb, - struct drm_gem_object *obj, +int intel_fb_bo_framebuffer_init(struct drm_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd); struct drm_gem_object * diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index ebdb22c9499d..db8b1a27b4de 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -24,8 +24,7 @@ void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj) xe_bo_put(bo); } -int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb, - struct drm_gem_object *obj, +int intel_fb_bo_framebuffer_init(struct drm_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) { struct xe_bo *bo = gem_to_xe_bo(obj); -- cgit v1.2.3 From 324ccdb5ceb9f13f012a9110ef3af79b8f21d861 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 3 Oct 2025 17:57:34 +0300 Subject: drm/i915/wm: Use fb->modfier to check for tiled vs. untiled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no point in checking the bo fence tiling mode when we can just check the fb modifier instead. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/i9xx_wm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index fd3b7b35f351..b262319bc83d 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -2295,12 +2295,11 @@ static void i9xx_update_wm(struct intel_display *display) crtc = single_enabled_crtc(display); if (display->platform.i915gm && crtc) { - struct drm_gem_object *obj; - - obj = intel_fb_bo(crtc->base.primary->state->fb); + const struct drm_framebuffer *fb = + crtc->base.primary->state->fb; /* self-refresh seems busted with untiled */ - if (!intel_bo_is_tiled(obj)) + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) crtc = NULL; } -- cgit v1.2.3 From b8459c3b6e96137527dea2164b816bf267867e4a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:23 +0300 Subject: drm/i915: Introduce intel_crtc_enable_changed() and intel_any_crtc_enable_changed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce helpers that determine whether any crtc is changing its enabled state. Will be useful for cdclk stuff. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-2-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_crtc.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_crtc.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index c0329e132462..ea735f76107b 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -753,3 +753,24 @@ void intel_pipe_update_end(struct intel_atomic_state *state, out: intel_psr_unlock(new_crtc_state); } + +bool intel_crtc_enable_changed(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + return old_crtc_state->hw.enable != new_crtc_state->hw.enable; +} + +bool intel_any_crtc_enable_changed(struct intel_atomic_state *state) +{ + const struct intel_crtc_state *old_crtc_state, *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (intel_crtc_enable_changed(old_crtc_state, new_crtc_state)) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/display/intel_crtc.h b/drivers/gpu/drm/i915/display/intel_crtc.h index 8c14ff8b391e..eae88e604e08 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.h +++ b/drivers/gpu/drm/i915/display/intel_crtc.h @@ -58,4 +58,8 @@ void intel_wait_for_vblank_if_active(struct intel_display *display, enum pipe pipe); void intel_crtc_wait_for_next_vblank(struct intel_crtc *crtc); +bool intel_any_crtc_enable_changed(struct intel_atomic_state *state); +bool intel_crtc_enable_changed(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state); + #endif -- cgit v1.2.3 From 663cb3e9b4e814ad3d41ae979e3de49c4f4ba44f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:24 +0300 Subject: drm/i915: Introduce intel_crtc_active_changed() and intel_any_crtc_active_changed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce helpers that determine whether any crtc is changing its active state. Will be useful for cdclk stuff. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-3-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_crtc.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_crtc.h | 3 +++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index ea735f76107b..7b39c3a5887c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -774,3 +774,24 @@ bool intel_any_crtc_enable_changed(struct intel_atomic_state *state) return false; } + +bool intel_crtc_active_changed(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + return old_crtc_state->hw.active != new_crtc_state->hw.active; +} + +bool intel_any_crtc_active_changed(struct intel_atomic_state *state) +{ + const struct intel_crtc_state *old_crtc_state, *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (intel_crtc_active_changed(old_crtc_state, new_crtc_state)) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/display/intel_crtc.h b/drivers/gpu/drm/i915/display/intel_crtc.h index eae88e604e08..cee09e7cd3dc 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.h +++ b/drivers/gpu/drm/i915/display/intel_crtc.h @@ -61,5 +61,8 @@ void intel_crtc_wait_for_next_vblank(struct intel_crtc *crtc); bool intel_any_crtc_enable_changed(struct intel_atomic_state *state); bool intel_crtc_enable_changed(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state); +bool intel_any_crtc_active_changed(struct intel_atomic_state *state); +bool intel_crtc_active_changed(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state); #endif -- cgit v1.2.3 From 4d0b11c2105c36f8df6663e1a729e10b8f09ca52 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:25 +0300 Subject: drm/i915/bw: Skip the bw_state->active_pipes update if no pipe is changing its active state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we may end up doing a bunch of redundant bw_state recomputation whenever any modeset happens. Skip a bunch of that by only considering whether any pipe actually changes its active state. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-4-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 25 ++++++++++++------------- drivers/gpu/drm/i915/display/intel_bw.h | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index ac6da20d9529..f05b9a35f17a 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -13,6 +13,7 @@ #include "intel_atomic.h" #include "intel_bw.h" #include "intel_cdclk.h" +#include "intel_crtc.h" #include "intel_display_core.h" #include "intel_display_regs.h" #include "intel_display_types.h" @@ -1530,10 +1531,14 @@ static int intel_bw_modeset_checks(struct intel_atomic_state *state) struct intel_display *display = to_intel_display(state); const struct intel_bw_state *old_bw_state; struct intel_bw_state *new_bw_state; + int ret; if (DISPLAY_VER(display) < 9) return 0; + if (!intel_any_crtc_active_changed(state)) + return 0; + new_bw_state = intel_atomic_get_bw_state(state); if (IS_ERR(new_bw_state)) return PTR_ERR(new_bw_state); @@ -1543,13 +1548,9 @@ static int intel_bw_modeset_checks(struct intel_atomic_state *state) new_bw_state->active_pipes = intel_calc_active_pipes(state, old_bw_state->active_pipes); - if (new_bw_state->active_pipes != old_bw_state->active_pipes) { - int ret; - - ret = intel_atomic_lock_global_state(&new_bw_state->base); - if (ret) - return ret; - } + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; return 0; } @@ -1599,7 +1600,7 @@ static int intel_bw_check_sagv_mask(struct intel_atomic_state *state) return 0; } -int intel_bw_atomic_check(struct intel_atomic_state *state, bool any_ms) +int intel_bw_atomic_check(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); bool changed = false; @@ -1610,11 +1611,9 @@ int intel_bw_atomic_check(struct intel_atomic_state *state, bool any_ms) if (DISPLAY_VER(display) < 9) return 0; - if (any_ms) { - ret = intel_bw_modeset_checks(state); - if (ret) - return ret; - } + ret = intel_bw_modeset_checks(state); + if (ret) + return ret; ret = intel_bw_check_sagv_mask(state); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index d51f50c9d302..c064e80a7a7f 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -28,7 +28,7 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state); void intel_bw_init_hw(struct intel_display *display); int intel_bw_init(struct intel_display *display); -int intel_bw_atomic_check(struct intel_atomic_state *state, bool any_ms); +int intel_bw_atomic_check(struct intel_atomic_state *state); int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc); int intel_bw_min_cdclk(struct intel_display *display, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b57efd870774..349daf985b98 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6434,7 +6434,7 @@ int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = intel_bw_atomic_check(state, any_ms); + ret = intel_bw_atomic_check(state); if (ret) goto fail; -- cgit v1.2.3 From 70f5b655503576588ec037ecb2ab6fe5b2aab8a9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:26 +0300 Subject: drm/1915/bw: Drop redundant display version checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_bw_modeset_checks() is now only called from intel_bw_atomic_check() which alrady does the display version check. Drop the redundant check from intel_bw_modeset_checks(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-5-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index f05b9a35f17a..8232b344a88f 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -1528,14 +1528,10 @@ static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *chan static int intel_bw_modeset_checks(struct intel_atomic_state *state) { - struct intel_display *display = to_intel_display(state); const struct intel_bw_state *old_bw_state; struct intel_bw_state *new_bw_state; int ret; - if (DISPLAY_VER(display) < 9) - return 0; - if (!intel_any_crtc_active_changed(state)) return 0; -- cgit v1.2.3 From 24c78dda6d737924cd89da02e33bdcf8992a1146 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:27 +0300 Subject: drm/i915/cdclk: Extract glk_cdclk_audio_wa_needed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the glk audio w/a check into a small helper. We'll have other uses for this later. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-6-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index b54b1006aeb0..05d9f488895e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2831,6 +2831,14 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return min_cdclk; } +static bool glk_cdclk_audio_wa_needed(struct intel_display *display, + const struct intel_cdclk_state *cdclk_state) +{ + return display->platform.geminilake && + cdclk_state->active_pipes && + !is_power_of_2(cdclk_state->active_pipes); +} + static int intel_compute_min_cdclk(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); @@ -2887,8 +2895,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * by changing the cd2x divider (see glk_cdclk_table[]) and * thus a full modeset won't be needed then. */ - if (display->platform.geminilake && cdclk_state->active_pipes && - !is_power_of_2(cdclk_state->active_pipes)) + if (glk_cdclk_audio_wa_needed(display, cdclk_state)) min_cdclk = max(min_cdclk, 2 * 96000); if (min_cdclk > display->cdclk.max_cdclk_freq) { -- cgit v1.2.3 From 9112ce99c1d7ef9ba51c930913e791fcac824f62 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 26 Sep 2025 11:39:50 +0300 Subject: drm/i915/cdclk: Extract dg2_power_well_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the code to determine the DG2 pipe power well count into a small helper. I'll have other uses for this later. TODO: need to move this power well stuff out from the cdclk code... v2: Don't lose the early return from intel_cdclk_pcode_pre_notify() (kernel test robot) v3: Compare old vs. new, not old vs. old (Jani) Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250926083950.24486-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 33 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 05d9f488895e..3635813ac0af 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2591,6 +2591,12 @@ static void intel_set_cdclk(struct intel_display *display, } } +static bool dg2_power_well_count(struct intel_display *display, + const struct intel_cdclk_state *cdclk_state) +{ + return display->platform.dg2 ? hweight8(cdclk_state->active_pipes) : 0; +} + static void intel_cdclk_pcode_pre_notify(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); @@ -2603,16 +2609,16 @@ static void intel_cdclk_pcode_pre_notify(struct intel_atomic_state *state) if (!intel_cdclk_changed(&old_cdclk_state->actual, &new_cdclk_state->actual) && - new_cdclk_state->active_pipes == - old_cdclk_state->active_pipes) + dg2_power_well_count(display, old_cdclk_state) == + dg2_power_well_count(display, new_cdclk_state)) return; /* According to "Sequence Before Frequency Change", voltage level set to 0x3 */ voltage_level = DISPLAY_TO_PCODE_VOLTAGE_MAX; change_cdclk = new_cdclk_state->actual.cdclk != old_cdclk_state->actual.cdclk; - update_pipe_count = hweight8(new_cdclk_state->active_pipes) > - hweight8(old_cdclk_state->active_pipes); + update_pipe_count = dg2_power_well_count(display, new_cdclk_state) > + dg2_power_well_count(display, old_cdclk_state); /* * According to "Sequence Before Frequency Change", @@ -2630,7 +2636,7 @@ static void intel_cdclk_pcode_pre_notify(struct intel_atomic_state *state) * no action if it is decreasing, before the change */ if (update_pipe_count) - num_active_pipes = hweight8(new_cdclk_state->active_pipes); + num_active_pipes = dg2_power_well_count(display, new_cdclk_state); intel_pcode_notify(display, voltage_level, num_active_pipes, cdclk, change_cdclk, update_pipe_count); @@ -2650,8 +2656,8 @@ static void intel_cdclk_pcode_post_notify(struct intel_atomic_state *state) voltage_level = new_cdclk_state->actual.voltage_level; update_cdclk = new_cdclk_state->actual.cdclk != old_cdclk_state->actual.cdclk; - update_pipe_count = hweight8(new_cdclk_state->active_pipes) < - hweight8(old_cdclk_state->active_pipes); + update_pipe_count = dg2_power_well_count(display, new_cdclk_state) < + dg2_power_well_count(display, old_cdclk_state); /* * According to "Sequence After Frequency Change", @@ -2667,7 +2673,7 @@ static void intel_cdclk_pcode_post_notify(struct intel_atomic_state *state) * no action if it is increasing, after the change */ if (update_pipe_count) - num_active_pipes = hweight8(new_cdclk_state->active_pipes); + num_active_pipes = dg2_power_well_count(display, new_cdclk_state); intel_pcode_notify(display, voltage_level, num_active_pipes, cdclk, update_cdclk, update_pipe_count); @@ -3248,15 +3254,14 @@ static bool intel_cdclk_need_serialize(struct intel_display *display, const struct intel_cdclk_state *old_cdclk_state, const struct intel_cdclk_state *new_cdclk_state) { - bool power_well_cnt_changed = hweight8(old_cdclk_state->active_pipes) != - hweight8(new_cdclk_state->active_pipes); - bool cdclk_changed = intel_cdclk_changed(&old_cdclk_state->actual, - &new_cdclk_state->actual); /* - * We need to poke hw for gen >= 12, because we notify PCode if + * We need to poke hw for DG2, because we notify PCode if * pipe power well count changes. */ - return cdclk_changed || (display->platform.dg2 && power_well_cnt_changed); + return intel_cdclk_changed(&old_cdclk_state->actual, + &new_cdclk_state->actual) || + dg2_power_well_count(display, old_cdclk_state) != + dg2_power_well_count(display, new_cdclk_state); } int intel_modeset_calc_cdclk(struct intel_atomic_state *state) -- cgit v1.2.3 From bcc492d712f53946c4b434e2fad5650637af0c68 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:29 +0300 Subject: drm/i915/cdclk: Introduce intel_cdclk_modeset_checks() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I plan to better decouple the cdclk computation from actual modesets. To that end make the cdclk code self sufficient in being able to determine if a full cdclk calculation/update is needed or not due to some not strictly cdclk related reason. Currently we have three such things that depend active_pipes: - cdclk_state->actual - glk audio w/a - dg2 power well stuff Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-8-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 48 ++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3635813ac0af..ce3d47b6b7aa 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3187,6 +3187,44 @@ intel_atomic_get_cdclk_state(struct intel_atomic_state *state) return to_intel_cdclk_state(cdclk_state); } +static int intel_cdclk_modeset_checks(struct intel_atomic_state *state, + bool *need_cdclk_calc) +{ + struct intel_display *display = to_intel_display(state); + const struct intel_cdclk_state *old_cdclk_state; + struct intel_cdclk_state *new_cdclk_state; + int ret; + + if (!intel_any_crtc_active_changed(state)) + return 0; + + new_cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(new_cdclk_state)) + return PTR_ERR(new_cdclk_state); + + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); + + new_cdclk_state->active_pipes = + intel_calc_active_pipes(state, old_cdclk_state->active_pipes); + + ret = intel_atomic_lock_global_state(&new_cdclk_state->base); + if (ret) + return ret; + + if (!old_cdclk_state->active_pipes != !new_cdclk_state->active_pipes) + *need_cdclk_calc = true; + + if (glk_cdclk_audio_wa_needed(display, old_cdclk_state) != + glk_cdclk_audio_wa_needed(display, new_cdclk_state)) + *need_cdclk_calc = true; + + if (dg2_power_well_count(display, old_cdclk_state) != + dg2_power_well_count(display, new_cdclk_state)) + *need_cdclk_calc = true; + + return 0; +} + int intel_cdclk_atomic_check(struct intel_atomic_state *state, bool *need_cdclk_calc) { @@ -3197,6 +3235,10 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, int ret; int i; + ret = intel_cdclk_modeset_checks(state, need_cdclk_calc); + if (ret) + return ret; + /* * active_planes bitmask has been updated, and potentially affected * planes are part of the state. We can now compute the minimum cdclk @@ -3278,9 +3320,6 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) old_cdclk_state = intel_atomic_get_old_cdclk_state(state); - new_cdclk_state->active_pipes = - intel_calc_active_pipes(state, old_cdclk_state->active_pipes); - ret = intel_cdclk_modeset_calc_cdclk(state); if (ret) return ret; @@ -3293,8 +3332,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) ret = intel_atomic_serialize_global_state(&new_cdclk_state->base); if (ret) return ret; - } else if (old_cdclk_state->active_pipes != new_cdclk_state->active_pipes || - old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk || + } else if (old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk || intel_cdclk_changed(&old_cdclk_state->logical, &new_cdclk_state->logical)) { ret = intel_atomic_lock_global_state(&new_cdclk_state->base); -- cgit v1.2.3 From f8dfd916fa9f192aaa4a24b6b5e9c0ac4888bfa9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:30 +0300 Subject: drm/i915/cdclk: Handle the force_min_cdclk state locking in intel_cdclk_atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up the mess inside intel_modeset_calc_cdclk() a bit by moving the intel_atomic_lock_global_state() for force_min_cdclk changes into intel_cdclk_atomic_check(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-9-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ce3d47b6b7aa..0ef87e0aaeb1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3229,7 +3229,7 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, bool *need_cdclk_calc) { const struct intel_cdclk_state *old_cdclk_state; - const struct intel_cdclk_state *new_cdclk_state; + struct intel_cdclk_state *new_cdclk_state; struct intel_plane_state __maybe_unused *plane_state; struct intel_plane *plane; int ret; @@ -3258,8 +3258,13 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, new_cdclk_state = intel_atomic_get_new_cdclk_state(state); if (new_cdclk_state && - old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) { + ret = intel_atomic_lock_global_state(&new_cdclk_state->base); + if (ret) + return ret; + *need_cdclk_calc = true; + } return 0; } @@ -3332,8 +3337,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) ret = intel_atomic_serialize_global_state(&new_cdclk_state->base); if (ret) return ret; - } else if (old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk || - intel_cdclk_changed(&old_cdclk_state->logical, + } else if (intel_cdclk_changed(&old_cdclk_state->logical, &new_cdclk_state->logical)) { ret = intel_atomic_lock_global_state(&new_cdclk_state->base); if (ret) -- cgit v1.2.3 From dd45d5a615d1b5697bdc21b44d8668bf25d16e48 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:31 +0300 Subject: drm/i915/cdclk: Extract intel_cdclk_update_bw_min_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hide the cdclk state details better by providing a helper (intel_cdclk_update_bw_min_cdclk()) by which the bw code can inform the cdclk code about a new bw_min_cdclk value. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-10-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 43 +++++------------------------- drivers/gpu/drm/i915/display/intel_cdclk.c | 28 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_cdclk.h | 3 +++ 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 8232b344a88f..7499ddec2b14 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -1405,12 +1405,10 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, struct intel_display *display = to_intel_display(state); struct intel_bw_state *new_bw_state = NULL; const struct intel_bw_state *old_bw_state = NULL; - const struct intel_cdclk_state *cdclk_state; const struct intel_crtc_state *old_crtc_state; const struct intel_crtc_state *new_crtc_state; - int old_min_cdclk, new_min_cdclk; struct intel_crtc *crtc; - int i; + int ret, i; if (DISPLAY_VER(display) < 9) return 0; @@ -1443,39 +1441,12 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, return ret; } - old_min_cdclk = intel_bw_min_cdclk(display, old_bw_state); - new_min_cdclk = intel_bw_min_cdclk(display, new_bw_state); - - /* - * No need to check against the cdclk state if - * the min cdclk doesn't increase. - * - * Ie. we only ever increase the cdclk due to bandwidth - * requirements. This can reduce back and forth - * display blinking due to constant cdclk changes. - */ - if (new_min_cdclk <= old_min_cdclk) - return 0; - - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) - return PTR_ERR(cdclk_state); - - /* - * No need to recalculate the cdclk state if - * the min cdclk doesn't increase. - * - * Ie. we only ever increase the cdclk due to bandwidth - * requirements. This can reduce back and forth - * display blinking due to constant cdclk changes. - */ - if (new_min_cdclk <= intel_cdclk_bw_min_cdclk(cdclk_state)) - return 0; - - drm_dbg_kms(display->drm, - "new bandwidth min cdclk (%d kHz) > old min cdclk (%d kHz)\n", - new_min_cdclk, intel_cdclk_bw_min_cdclk(cdclk_state)); - *need_cdclk_calc = true; + ret = intel_cdclk_update_bw_min_cdclk(state, + intel_bw_min_cdclk(display, old_bw_state), + intel_bw_min_cdclk(display, new_bw_state), + need_cdclk_calc); + if (ret) + return ret; return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0ef87e0aaeb1..c1f00bb7b8d3 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2837,6 +2837,34 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return min_cdclk; } +int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc) +{ + struct intel_display *display = to_intel_display(state); + struct intel_cdclk_state *cdclk_state; + + if (new_min_cdclk <= old_min_cdclk) + return 0; + + cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(cdclk_state)) + return PTR_ERR(cdclk_state); + + old_min_cdclk = cdclk_state->bw_min_cdclk; + + if (new_min_cdclk <= old_min_cdclk) + return 0; + + *need_cdclk_calc = true; + + drm_dbg_kms(display->drm, + "bandwidth min cdclk: %d kHz -> %d kHz\n", + old_min_cdclk, new_min_cdclk); + + return 0; +} + static bool glk_cdclk_audio_wa_needed(struct intel_display *display, const struct intel_cdclk_state *cdclk_state) { diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index cacee598af0e..0e67c75ca569 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -48,6 +48,9 @@ struct intel_cdclk_state * intel_atomic_get_cdclk_state(struct intel_atomic_state *state); void intel_cdclk_update_hw_state(struct intel_display *display); void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc); +int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc); #define to_intel_cdclk_state(global_state) \ container_of_const((global_state), struct intel_cdclk_state, base) -- cgit v1.2.3 From 25b0657e7fe849897bb28831860a50c7ab6ad707 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:32 +0300 Subject: drm/i915/cdclk: Extract intel_cdclk_update_crtc_min_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hide the cdclk state details better by providing a helper (intel_cdclk_update_crtc_min_cdclk()) by which the crtc code can inform the cdclk code about a new per-pipe min_cdclk value. Note that this is currently being called once per-plane, but it'll be changed to be just a single call for the whole pipe later. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-11-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 30 ++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_cdclk.h | 4 +++ drivers/gpu/drm/i915/display/intel_plane.c | 44 +++++------------------------- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c1f00bb7b8d3..5eae148e114b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2837,6 +2837,36 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return min_cdclk; } +int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, + struct intel_crtc *crtc, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc) +{ + struct intel_display *display = to_intel_display(state); + struct intel_cdclk_state *cdclk_state; + + if (new_min_cdclk <= old_min_cdclk) + return 0; + + cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(cdclk_state)) + return PTR_ERR(cdclk_state); + + old_min_cdclk = cdclk_state->min_cdclk[crtc->pipe]; + + if (new_min_cdclk <= old_min_cdclk) + return 0; + + *need_cdclk_calc = true; + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] min cdclk: %d kHz -> %d kHz\n", + crtc->base.base.id, crtc->base.name, + old_min_cdclk, new_min_cdclk); + + return 0; +} + int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, int old_min_cdclk, int new_min_cdclk, bool *need_cdclk_calc) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 0e67c75ca569..25d45c8f059d 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -51,6 +51,10 @@ void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc); int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, int old_min_cdclk, int new_min_cdclk, bool *need_cdclk_calc); +int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, + struct intel_crtc *crtc, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc); #define to_intel_cdclk_state(global_state) \ container_of_const((global_state), struct intel_cdclk_state, base) diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 19d4640c0e53..aceac2ef28a8 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -295,13 +295,12 @@ int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, struct intel_plane *plane, bool *need_cdclk_calc) { - struct intel_display *display = to_intel_display(plane); const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); - const struct intel_cdclk_state *cdclk_state; const struct intel_crtc_state *old_crtc_state; struct intel_crtc_state *new_crtc_state; + int ret; if (!plane_state->uapi.visible || !plane->min_cdclk) return 0; @@ -312,41 +311,12 @@ int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, new_crtc_state->min_cdclk[plane->id] = plane->min_cdclk(new_crtc_state, plane_state); - /* - * No need to check against the cdclk state if - * the min cdclk for the plane doesn't increase. - * - * Ie. we only ever increase the cdclk due to plane - * requirements. This can reduce back and forth - * display blinking due to constant cdclk changes. - */ - if (new_crtc_state->min_cdclk[plane->id] <= - old_crtc_state->min_cdclk[plane->id]) - return 0; - - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) - return PTR_ERR(cdclk_state); - - /* - * No need to recalculate the cdclk state if - * the min cdclk for the pipe doesn't increase. - * - * Ie. we only ever increase the cdclk due to plane - * requirements. This can reduce back and forth - * display blinking due to constant cdclk changes. - */ - if (new_crtc_state->min_cdclk[plane->id] <= - intel_cdclk_min_cdclk(cdclk_state, crtc->pipe)) - return 0; - - drm_dbg_kms(display->drm, - "[PLANE:%d:%s] min cdclk (%d kHz) > [CRTC:%d:%s] min cdclk (%d kHz)\n", - plane->base.base.id, plane->base.name, - new_crtc_state->min_cdclk[plane->id], - crtc->base.base.id, crtc->base.name, - intel_cdclk_min_cdclk(cdclk_state, crtc->pipe)); - *need_cdclk_calc = true; + ret = intel_cdclk_update_crtc_min_cdclk(state, crtc, + old_crtc_state->min_cdclk[plane->id], + new_crtc_state->min_cdclk[plane->id], + need_cdclk_calc); + if (ret) + return ret; return 0; } -- cgit v1.2.3 From 84105a358f6dacc88202c6017e15985394ad033f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:33 +0300 Subject: drm/i915/cdclk: Rework bw_min_cdclk handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update bw_min_cdclk directly from intel_bw_calc_min_cdclk() rather than doing it later from intel_compute_min_cdclk(). This will allow better control over when to update the cdclk. For now we preserve the current behaviour by allowing the cdclk to decrease when any pipe needs to do a full modeset. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-12-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 38 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 5eae148e114b..e621dbef9c49 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2873,8 +2873,13 @@ int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, { struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state; + bool allow_cdclk_decrease = intel_any_crtc_needs_modeset(state); + int ret; - if (new_min_cdclk <= old_min_cdclk) + if (new_min_cdclk == old_min_cdclk) + return 0; + + if (!allow_cdclk_decrease && new_min_cdclk < old_min_cdclk) return 0; cdclk_state = intel_atomic_get_cdclk_state(state); @@ -2883,9 +2888,18 @@ int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, old_min_cdclk = cdclk_state->bw_min_cdclk; - if (new_min_cdclk <= old_min_cdclk) + if (new_min_cdclk == old_min_cdclk) return 0; + if (!allow_cdclk_decrease && new_min_cdclk < old_min_cdclk) + return 0; + + cdclk_state->bw_min_cdclk = new_min_cdclk; + + ret = intel_atomic_lock_global_state(&cdclk_state->base); + if (ret) + return ret; + *need_cdclk_calc = true; drm_dbg_kms(display->drm, @@ -2908,7 +2922,6 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state = intel_atomic_get_new_cdclk_state(state); - const struct intel_bw_state *bw_state; struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; int min_cdclk, i; @@ -2931,23 +2944,8 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) return ret; } - bw_state = intel_atomic_get_new_bw_state(state); - if (bw_state) { - min_cdclk = intel_bw_min_cdclk(display, bw_state); - - if (cdclk_state->bw_min_cdclk != min_cdclk) { - int ret; - - cdclk_state->bw_min_cdclk = min_cdclk; - - ret = intel_atomic_lock_global_state(&cdclk_state->base); - if (ret) - return ret; - } - } - - min_cdclk = max(cdclk_state->force_min_cdclk, - cdclk_state->bw_min_cdclk); + min_cdclk = cdclk_state->force_min_cdclk; + min_cdclk = max(min_cdclk, cdclk_state->bw_min_cdclk); for_each_pipe(display, pipe) min_cdclk = max(min_cdclk, cdclk_state->min_cdclk[pipe]); -- cgit v1.2.3 From 8f7443ae52c7885f64e7182d61afa046d27c2649 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:34 +0300 Subject: drm/i915/cdclk: Do intel_cdclk_update_crtc_min_cdclk() per-pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we call intel_cdclk_update_crtc_min_cdclk() per-plane. That is rather wasteful, and also won't account for any of the other per-pipe min_cdclk restrictions from intel_crtc_compute_min_cdclk(). Change the behaviour to do do the comparison per-crtc instead, and use the final min cdclk as computed by intel_crtc_compute_min_cdclk(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-13-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 40 +++++++++++++++++++++++------- drivers/gpu/drm/i915/display/intel_cdclk.h | 4 --- drivers/gpu/drm/i915/display/intel_plane.c | 19 +++----------- drivers/gpu/drm/i915/display/intel_plane.h | 5 ++-- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index e621dbef9c49..7017429506ee 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2837,10 +2837,10 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return min_cdclk; } -int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, - struct intel_crtc *crtc, - int old_min_cdclk, int new_min_cdclk, - bool *need_cdclk_calc) +static int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, + struct intel_crtc *crtc, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc) { struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state; @@ -3281,6 +3281,27 @@ static int intel_cdclk_modeset_checks(struct intel_atomic_state *state, return 0; } +static int intel_crtcs_calc_min_cdclk(struct intel_atomic_state *state, + bool *need_cdclk_calc) +{ + const struct intel_crtc_state *old_crtc_state; + const struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i, ret; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + ret = intel_cdclk_update_crtc_min_cdclk(state, crtc, + intel_crtc_compute_min_cdclk(old_crtc_state), + intel_crtc_compute_min_cdclk(new_crtc_state), + need_cdclk_calc); + if (ret) + return ret; + } + + return 0; +} + int intel_cdclk_atomic_check(struct intel_atomic_state *state, bool *need_cdclk_calc) { @@ -3300,11 +3321,12 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, * planes are part of the state. We can now compute the minimum cdclk * for each plane. */ - for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - ret = intel_plane_calc_min_cdclk(state, plane, need_cdclk_calc); - if (ret) - return ret; - } + for_each_new_intel_plane_in_state(state, plane, plane_state, i) + intel_plane_calc_min_cdclk(state, plane); + + ret = intel_crtcs_calc_min_cdclk(state, need_cdclk_calc); + if (ret) + return ret; ret = intel_bw_calc_min_cdclk(state, need_cdclk_calc); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 25d45c8f059d..0e67c75ca569 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -51,10 +51,6 @@ void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc); int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, int old_min_cdclk, int new_min_cdclk, bool *need_cdclk_calc); -int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, - struct intel_crtc *crtc, - int old_min_cdclk, int new_min_cdclk, - bool *need_cdclk_calc); #define to_intel_cdclk_state(global_state) \ container_of_const((global_state), struct intel_cdclk_state, base) diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index aceac2ef28a8..a1ca8547a3da 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -291,34 +291,21 @@ intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, rel_data_rate); } -int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane, - bool *need_cdclk_calc) +void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane) { const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); - const struct intel_crtc_state *old_crtc_state; struct intel_crtc_state *new_crtc_state; - int ret; if (!plane_state->uapi.visible || !plane->min_cdclk) - return 0; + return; - old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); new_crtc_state->min_cdclk[plane->id] = plane->min_cdclk(new_crtc_state, plane_state); - - ret = intel_cdclk_update_crtc_min_cdclk(state, crtc, - old_crtc_state->min_cdclk[plane->id], - new_crtc_state->min_cdclk[plane->id], - need_cdclk_calc); - if (ret) - return ret; - - return 0; } static void intel_plane_clear_hw_state(struct intel_plane_state *plane_state) diff --git a/drivers/gpu/drm/i915/display/intel_plane.h b/drivers/gpu/drm/i915/display/intel_plane.h index 8af41ccc0a69..c6bef1b3471d 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.h +++ b/drivers/gpu/drm/i915/display/intel_plane.h @@ -69,9 +69,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); -int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane, - bool *need_cdclk_calc); +void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane); int intel_plane_check_clipping(struct intel_plane_state *plane_state, struct intel_crtc_state *crtc_state, int min_scale, int max_scale, -- cgit v1.2.3 From a6d20cb1d808c4ffed5daa292da83d0e9c3d929f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:35 +0300 Subject: drm/i915/cdclk: Relocate intel_plane_calc_min_cdclk() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason to defer intel_plane_calc_min_cdclk() until intel_cdclk_atomic_check(). Just do this as part of intel_atomic_check_planes() (after we've added all the planes to the state that affect the per-plane min_cdclk calculation). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-14-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 11 ----------- drivers/gpu/drm/i915/display/intel_plane.c | 7 +++++-- drivers/gpu/drm/i915/display/intel_plane.h | 2 -- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 7017429506ee..7f62fa25ce0c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3307,23 +3307,12 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, { const struct intel_cdclk_state *old_cdclk_state; struct intel_cdclk_state *new_cdclk_state; - struct intel_plane_state __maybe_unused *plane_state; - struct intel_plane *plane; int ret; - int i; ret = intel_cdclk_modeset_checks(state, need_cdclk_calc); if (ret) return ret; - /* - * active_planes bitmask has been updated, and potentially affected - * planes are part of the state. We can now compute the minimum cdclk - * for each plane. - */ - for_each_new_intel_plane_in_state(state, plane, plane_state, i) - intel_plane_calc_min_cdclk(state, plane); - ret = intel_crtcs_calc_min_cdclk(state, need_cdclk_calc); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index a1ca8547a3da..074de9275951 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -291,8 +291,8 @@ intel_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, rel_data_rate); } -void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane) +static void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, + struct intel_plane *plane) { const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); @@ -1700,5 +1700,8 @@ int intel_plane_atomic_check(struct intel_atomic_state *state) return ret; } + for_each_new_intel_plane_in_state(state, plane, plane_state, i) + intel_plane_calc_min_cdclk(state, plane); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_plane.h b/drivers/gpu/drm/i915/display/intel_plane.h index c6bef1b3471d..4e99df9de3e8 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.h +++ b/drivers/gpu/drm/i915/display/intel_plane.h @@ -69,8 +69,6 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); -void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane); int intel_plane_check_clipping(struct intel_plane_state *plane_state, struct intel_crtc_state *crtc_state, int min_scale, int max_scale, -- cgit v1.2.3 From fa7fd8ebb6ff21f5ee4d9cba765ade96a2442ecf Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:36 +0300 Subject: drm/i915/cdclk: Rework crtc min_cdclk handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update crtc min_cdclk directly from when calling intel_cdclk_update_crtc_min_cdclk() rather than doing it later from intel_compute_min_cdclk(). This will eg. allow better control over when to update the cdclk. For now we preserve the current behaviour by allowing the cdclk to decrease when any pipe needs to do a full modeset. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-15-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 39 +++++++++++++----------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 7f62fa25ce0c..1241dd0fb343 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2844,8 +2844,13 @@ static int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, { struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state; + bool allow_cdclk_decrease = intel_any_crtc_needs_modeset(state); + int ret; + + if (new_min_cdclk == old_min_cdclk) + return 0; - if (new_min_cdclk <= old_min_cdclk) + if (!allow_cdclk_decrease && new_min_cdclk < old_min_cdclk) return 0; cdclk_state = intel_atomic_get_cdclk_state(state); @@ -2854,9 +2859,18 @@ static int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, old_min_cdclk = cdclk_state->min_cdclk[crtc->pipe]; - if (new_min_cdclk <= old_min_cdclk) + if (new_min_cdclk == old_min_cdclk) return 0; + if (!allow_cdclk_decrease && new_min_cdclk < old_min_cdclk) + return 0; + + cdclk_state->min_cdclk[crtc->pipe] = new_min_cdclk; + + ret = intel_atomic_lock_global_state(&cdclk_state->base); + if (ret) + return ret; + *need_cdclk_calc = true; drm_dbg_kms(display->drm, @@ -2922,27 +2936,8 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state = intel_atomic_get_new_cdclk_state(state); - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - int min_cdclk, i; enum pipe pipe; - - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - int ret; - - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (min_cdclk < 0) - return min_cdclk; - - if (cdclk_state->min_cdclk[crtc->pipe] == min_cdclk) - continue; - - cdclk_state->min_cdclk[crtc->pipe] = min_cdclk; - - ret = intel_atomic_lock_global_state(&cdclk_state->base); - if (ret) - return ret; - } + int min_cdclk; min_cdclk = cdclk_state->force_min_cdclk; min_cdclk = max(min_cdclk, cdclk_state->bw_min_cdclk); -- cgit v1.2.3 From 3d23ce8c55265c05966307fb8a3615d05be33cdd Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:37 +0300 Subject: drm/i915/cdclk: Move intel_bw_crtc_min_cdclk() handling into intel_crtc_compute_min_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_bw_crtc_min_cdclk() depends only on per-crtc state, so there is no real point in having it complicate the global bw_min_cdclk. Instead let's just account for it in intel_crtc_compute_min_cdclk(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-16-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 17 ++++------------- drivers/gpu/drm/i915/display/intel_bw.h | 1 + drivers/gpu/drm/i915/display/intel_cdclk.c | 1 + 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 7499ddec2b14..b53bcb693e79 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -871,13 +871,14 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_ } /* "Maximum Pipe Read Bandwidth" */ -static int intel_bw_crtc_min_cdclk(struct intel_display *display, - unsigned int data_rate) +int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); + if (DISPLAY_VER(display) < 12) return 0; - return DIV_ROUND_UP_ULL(mul_u32_u32(data_rate, 10), 512); + return DIV_ROUND_UP_ULL(mul_u32_u32(intel_bw_crtc_data_rate(crtc_state), 10), 512); } static unsigned int intel_bw_num_active_planes(struct intel_display *display, @@ -1292,10 +1293,6 @@ static bool intel_bw_state_changed(struct intel_display *display, if (intel_dbuf_bw_changed(display, old_dbuf_bw, new_dbuf_bw)) return true; - - if (intel_bw_crtc_min_cdclk(display, old_bw_state->data_rate[pipe]) != - intel_bw_crtc_min_cdclk(display, new_bw_state->data_rate[pipe])) - return true; } return false; @@ -1386,16 +1383,10 @@ intel_bw_dbuf_min_cdclk(struct intel_display *display, int intel_bw_min_cdclk(struct intel_display *display, const struct intel_bw_state *bw_state) { - enum pipe pipe; int min_cdclk; min_cdclk = intel_bw_dbuf_min_cdclk(display, bw_state); - for_each_pipe(display, pipe) - min_cdclk = max(min_cdclk, - intel_bw_crtc_min_cdclk(display, - bw_state->data_rate[pipe])); - return min_cdclk; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index c064e80a7a7f..4bb3a637b295 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -29,6 +29,7 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state); void intel_bw_init_hw(struct intel_display *display); int intel_bw_init(struct intel_display *display); int intel_bw_atomic_check(struct intel_atomic_state *state); +int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state); int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc); int intel_bw_min_cdclk(struct intel_display *display, diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 1241dd0fb343..b45b84293cba 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2828,6 +2828,7 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return 0; min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); + min_cdclk = max(min_cdclk, intel_bw_crtc_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, hsw_ips_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, intel_audio_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, vlv_dsi_min_cdclk(crtc_state)); -- cgit v1.2.3 From ba91b9eecb4779da8221e895a5c892c30629e04e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:38 +0300 Subject: drm/i915/cdclk: Decouple cdclk from state->modeset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no real reason anymore to tie cdclk updates to state->modeset/any_ms. Always call the cdclk functions and allow them to decide whether cdclk update is necessary/desired or not. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-17-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_display.c | 14 ++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index b45b84293cba..a08933e769ae 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2708,6 +2708,9 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state) struct intel_cdclk_config cdclk_config; enum pipe pipe; + if (!new_cdclk_state) + return; + if (!intel_cdclk_changed(&old_cdclk_state->actual, &new_cdclk_state->actual)) return; @@ -2760,6 +2763,9 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state) intel_atomic_get_new_cdclk_state(state); enum pipe pipe; + if (!new_cdclk_state) + return; + if (!intel_cdclk_changed(&old_cdclk_state->actual, &new_cdclk_state->actual)) return; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 349daf985b98..c9bd160c7b32 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6305,6 +6305,7 @@ int intel_atomic_check(struct drm_device *dev, struct intel_atomic_state *state = to_intel_atomic_state(_state); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; + bool need_cdclk_calc = false; int ret, i; bool any_ms = false; @@ -6438,7 +6439,7 @@ int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = intel_cdclk_atomic_check(state, &any_ms); + ret = intel_cdclk_atomic_check(state, &need_cdclk_calc); if (ret) goto fail; @@ -6449,7 +6450,9 @@ int intel_atomic_check(struct drm_device *dev, ret = intel_modeset_checks(state); if (ret) goto fail; + } + if (need_cdclk_calc) { ret = intel_modeset_calc_cdclk(state); if (ret) return ret; @@ -7361,13 +7364,13 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) */ intel_pmdemand_pre_plane_update(state); - if (state->modeset) { + if (state->modeset) drm_atomic_helper_update_legacy_modeset_state(display->drm, &state->base); - intel_set_cdclk_pre_plane_update(state); + intel_set_cdclk_pre_plane_update(state); + if (state->modeset) intel_modeset_verify_disabled(state); - } intel_sagv_pre_plane_update(state); @@ -7480,8 +7483,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_verify_planes(state); intel_sagv_post_plane_update(state); - if (state->modeset) - intel_set_cdclk_post_plane_update(state); + intel_set_cdclk_post_plane_update(state); intel_pmdemand_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); -- cgit v1.2.3 From e76f0dd3782a56839bb9e0c0a95464d31bf9790b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:39 +0300 Subject: drm/i915: Introduce intel_calc_enabled_pipes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add intel_calc_enabled_pipes() as the counterpart to intel_calc_active_pipes(). We have some uses where the set of logically enabled pipes makes more sense than the set of active pipes. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-18-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_display.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c9bd160c7b32..bf05ddebedda 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5653,6 +5653,23 @@ static int hsw_mode_set_planes_workaround(struct intel_atomic_state *state) return 0; } +u8 intel_calc_enabled_pipes(struct intel_atomic_state *state, + u8 enabled_pipes) +{ + const struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + if (crtc_state->hw.enable) + enabled_pipes |= BIT(crtc->pipe); + else + enabled_pipes &= ~BIT(crtc->pipe); + } + + return enabled_pipes; +} + u8 intel_calc_active_pipes(struct intel_atomic_state *state, u8 active_pipes) { diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 9a9a44b61f7f..fc2ef92ccf68 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -394,6 +394,8 @@ enum phy_fia { i) int intel_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); +u8 intel_calc_enabled_pipes(struct intel_atomic_state *state, + u8 enabled_pipes); u8 intel_calc_active_pipes(struct intel_atomic_state *state, u8 active_pipes); void intel_link_compute_m_n(u16 bpp, int nlanes, -- cgit v1.2.3 From 4b044b1368fa85f27f2d2121b720a5ef5db128e3 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:40 +0300 Subject: drm/i915/cdclk: Use enabled_pipes instead of active_pipes for the glk audio w/a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are considering the set of active pipes when determining if we need to boost the cdclk due to glk audio issues. Replace that with the set of logically enabled pipes instead. That is generally how everything else cdclk related is computed (cdclk_state->logical is based on logically enabled pipes). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-19-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index a08933e769ae..ae7fa3e172c9 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -146,6 +146,9 @@ struct intel_cdclk_state { /* forced minimum cdclk for glk+ audio w/a */ int force_min_cdclk; + /* bitmask of enabled pipes */ + u8 enabled_pipes; + /* bitmask of active pipes */ u8 active_pipes; @@ -2934,8 +2937,8 @@ static bool glk_cdclk_audio_wa_needed(struct intel_display *display, const struct intel_cdclk_state *cdclk_state) { return display->platform.geminilake && - cdclk_state->active_pipes && - !is_power_of_2(cdclk_state->active_pipes); + cdclk_state->enabled_pipes && + !is_power_of_2(cdclk_state->enabled_pipes); } static int intel_compute_min_cdclk(struct intel_atomic_state *state) @@ -3253,7 +3256,8 @@ static int intel_cdclk_modeset_checks(struct intel_atomic_state *state, struct intel_cdclk_state *new_cdclk_state; int ret; - if (!intel_any_crtc_active_changed(state)) + if (!intel_any_crtc_enable_changed(state) && + !intel_any_crtc_active_changed(state)) return 0; new_cdclk_state = intel_atomic_get_cdclk_state(state); @@ -3262,6 +3266,9 @@ static int intel_cdclk_modeset_checks(struct intel_atomic_state *state, old_cdclk_state = intel_atomic_get_old_cdclk_state(state); + new_cdclk_state->enabled_pipes = + intel_calc_enabled_pipes(state, old_cdclk_state->enabled_pipes); + new_cdclk_state->active_pipes = intel_calc_active_pipes(state, old_cdclk_state->active_pipes); @@ -3496,6 +3503,7 @@ void intel_cdclk_update_hw_state(struct intel_display *display) to_intel_cdclk_state(display->cdclk.obj.state); struct intel_crtc *crtc; + cdclk_state->enabled_pipes = 0; cdclk_state->active_pipes = 0; for_each_intel_crtc(display->drm, crtc) { @@ -3503,6 +3511,8 @@ void intel_cdclk_update_hw_state(struct intel_display *display) to_intel_crtc_state(crtc->base.state); enum pipe pipe = crtc->pipe; + if (crtc_state->hw.enable) + cdclk_state->enabled_pipes |= BIT(pipe); if (crtc_state->hw.active) cdclk_state->active_pipes |= BIT(pipe); -- cgit v1.2.3 From 1cb17a6a273b2536441af8c4b97a79d5e6183e0e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:41 +0300 Subject: drm/i915/cdclk: Hide intel_modeset_calc_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer have anything of importance between intel_cdclk_atomic_check() and intel_modeset_calc_cdclk(), so hide the latter inside the former. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-20-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/display/intel_cdclk.h | 4 +--- drivers/gpu/drm/i915/display/intel_display.c | 22 ++++------------------ 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ae7fa3e172c9..dc10223cbc15 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3311,22 +3311,24 @@ static int intel_crtcs_calc_min_cdclk(struct intel_atomic_state *state, return 0; } -int intel_cdclk_atomic_check(struct intel_atomic_state *state, - bool *need_cdclk_calc) +static int intel_modeset_calc_cdclk(struct intel_atomic_state *state); + +int intel_cdclk_atomic_check(struct intel_atomic_state *state) { const struct intel_cdclk_state *old_cdclk_state; struct intel_cdclk_state *new_cdclk_state; + bool need_cdclk_calc = false; int ret; - ret = intel_cdclk_modeset_checks(state, need_cdclk_calc); + ret = intel_cdclk_modeset_checks(state, &need_cdclk_calc); if (ret) return ret; - ret = intel_crtcs_calc_min_cdclk(state, need_cdclk_calc); + ret = intel_crtcs_calc_min_cdclk(state, &need_cdclk_calc); if (ret) return ret; - ret = intel_bw_calc_min_cdclk(state, need_cdclk_calc); + ret = intel_bw_calc_min_cdclk(state, &need_cdclk_calc); if (ret) return ret; @@ -3339,7 +3341,13 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state, if (ret) return ret; - *need_cdclk_calc = true; + need_cdclk_calc = true; + } + + if (need_cdclk_calc) { + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; } return 0; @@ -3387,7 +3395,7 @@ static bool intel_cdclk_need_serialize(struct intel_display *display, dg2_power_well_count(display, new_cdclk_state); } -int intel_modeset_calc_cdclk(struct intel_atomic_state *state) +static int intel_modeset_calc_cdclk(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); const struct intel_cdclk_state *old_cdclk_state; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 0e67c75ca569..72963f6f399a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -38,11 +38,9 @@ void intel_set_cdclk_post_plane_update(struct intel_atomic_state *state); void intel_cdclk_dump_config(struct intel_display *display, const struct intel_cdclk_config *cdclk_config, const char *context); -int intel_modeset_calc_cdclk(struct intel_atomic_state *state); void intel_cdclk_get_cdclk(struct intel_display *display, struct intel_cdclk_config *cdclk_config); -int intel_cdclk_atomic_check(struct intel_atomic_state *state, - bool *need_cdclk_calc); +int intel_cdclk_atomic_check(struct intel_atomic_state *state); int intel_cdclk_state_set_joined_mbus(struct intel_atomic_state *state, bool joined_mbus); struct intel_cdclk_state * intel_atomic_get_cdclk_state(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bf05ddebedda..d5b2612d4ec2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6322,9 +6322,7 @@ int intel_atomic_check(struct drm_device *dev, struct intel_atomic_state *state = to_intel_atomic_state(_state); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; - bool need_cdclk_calc = false; int ret, i; - bool any_ms = false; if (!intel_display_driver_check_access(display)) return -ENODEV; @@ -6432,14 +6430,11 @@ int intel_atomic_check(struct drm_device *dev, if (!intel_crtc_needs_modeset(new_crtc_state)) continue; - any_ms = true; - intel_dpll_release(state, crtc); } - if (any_ms && !check_digital_port_conflicts(state)) { - drm_dbg_kms(display->drm, - "rejecting conflicting digital port configuration\n"); + if (intel_any_crtc_needs_modeset(state) && !check_digital_port_conflicts(state)) { + drm_dbg_kms(display->drm, "rejecting conflicting digital port configuration\n"); ret = -EINVAL; goto fail; } @@ -6456,25 +6451,16 @@ int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = intel_cdclk_atomic_check(state, &need_cdclk_calc); + ret = intel_cdclk_atomic_check(state); if (ret) goto fail; - if (intel_any_crtc_needs_modeset(state)) - any_ms = true; - - if (any_ms) { + if (intel_any_crtc_needs_modeset(state)) { ret = intel_modeset_checks(state); if (ret) goto fail; } - if (need_cdclk_calc) { - ret = intel_modeset_calc_cdclk(state); - if (ret) - return ret; - } - ret = intel_pmdemand_atomic_check(state); if (ret) goto fail; -- cgit v1.2.3 From a051ef9f12f5b15857b54d6d68f1f525959c7ad9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 23 Sep 2025 20:19:42 +0300 Subject: drm/i915/cdclk: Move intel_cdclk_atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move intel_cdclk_atomic_check() a bit so that we don't need an extra intel_modeset_calc_cdclk() forward declaration. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250923171943.7319-21-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 82 +++++++++++++++--------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index dc10223cbc15..f2e092f89ddd 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3311,48 +3311,6 @@ static int intel_crtcs_calc_min_cdclk(struct intel_atomic_state *state, return 0; } -static int intel_modeset_calc_cdclk(struct intel_atomic_state *state); - -int intel_cdclk_atomic_check(struct intel_atomic_state *state) -{ - const struct intel_cdclk_state *old_cdclk_state; - struct intel_cdclk_state *new_cdclk_state; - bool need_cdclk_calc = false; - int ret; - - ret = intel_cdclk_modeset_checks(state, &need_cdclk_calc); - if (ret) - return ret; - - ret = intel_crtcs_calc_min_cdclk(state, &need_cdclk_calc); - if (ret) - return ret; - - ret = intel_bw_calc_min_cdclk(state, &need_cdclk_calc); - if (ret) - return ret; - - old_cdclk_state = intel_atomic_get_old_cdclk_state(state); - new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - - if (new_cdclk_state && - old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) { - ret = intel_atomic_lock_global_state(&new_cdclk_state->base); - if (ret) - return ret; - - need_cdclk_calc = true; - } - - if (need_cdclk_calc) { - ret = intel_modeset_calc_cdclk(state); - if (ret) - return ret; - } - - return 0; -} - int intel_cdclk_state_set_joined_mbus(struct intel_atomic_state *state, bool joined_mbus) { struct intel_cdclk_state *cdclk_state; @@ -3503,6 +3461,46 @@ static int intel_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } +int intel_cdclk_atomic_check(struct intel_atomic_state *state) +{ + const struct intel_cdclk_state *old_cdclk_state; + struct intel_cdclk_state *new_cdclk_state; + bool need_cdclk_calc = false; + int ret; + + ret = intel_cdclk_modeset_checks(state, &need_cdclk_calc); + if (ret) + return ret; + + ret = intel_crtcs_calc_min_cdclk(state, &need_cdclk_calc); + if (ret) + return ret; + + ret = intel_bw_calc_min_cdclk(state, &need_cdclk_calc); + if (ret) + return ret; + + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); + + if (new_cdclk_state && + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) { + ret = intel_atomic_lock_global_state(&new_cdclk_state->base); + if (ret) + return ret; + + need_cdclk_calc = true; + } + + if (need_cdclk_calc) { + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; + } + + return 0; +} + void intel_cdclk_update_hw_state(struct intel_display *display) { const struct intel_bw_state *bw_state = -- cgit v1.2.3 From e095b55155ef69a8ae0eb114a7fd2a381c012f33 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 24 Sep 2025 12:23:26 +0530 Subject: drm/amdgpu: use user provided hmm_range buffer in amdgpu_ttm_tt_get_user_pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update the amdgpu_ttm_tt_get_user_pages and all dependent function along with it callers to use a user allocated hmm_range buffer instead hmm layer allocates the buffer. This is a need to get hmm_range pointers easily accessible without accessing the bo and that is a requirement for the userqueue to lock the userptrs effectively. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 ++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 11 +---------- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++++-- 8 files changed, 38 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a2ca9acf8c4e..70c83e10b9c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1089,8 +1089,15 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, &range); + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo, range); if (ret) { + kfree(range); if (ret == -EAGAIN) pr_debug("Failed to get user pages, try again\n"); else @@ -2566,9 +2573,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } } + mem->range = kzalloc(sizeof(*mem->range), GFP_KERNEL); + if (unlikely(!mem->range)) + return -ENOMEM; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); + ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range); if (ret) { + kfree(mem->range); + mem->range = NULL; pr_debug("Failed %d to get user pages\n", ret); /* Return -EFAULT bad address error as success. It will diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6a96af7fb1..59951d075703 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -891,7 +891,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); + e->range = kzalloc(sizeof(*e->range), GFP_KERNEL); + if (unlikely(!e->range)) + return -ENOMEM; + + r = amdgpu_ttm_tt_get_user_pages(bo, e->range); if (r) goto out_free_user_pages; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b7ebae289bea..b0c2a1434f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -572,10 +572,14 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, &range); - if (r) + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) + return -ENOMEM; + r = amdgpu_ttm_tt_get_user_pages(bo, range); + if (r) { + kfree(range); goto release_object; - + } r = amdgpu_bo_reserve(bo, true); if (r) goto user_pages_done; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 2c6a6b858112..53d405a92a14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -168,18 +168,13 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, - struct hmm_range **phmm_range) + struct hmm_range *hmm_range) { - struct hmm_range *hmm_range; unsigned long end; unsigned long timeout; unsigned long *pfns; int r = 0; - hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); - if (unlikely(!hmm_range)) - return -ENOMEM; - pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); if (unlikely(!pfns)) { r = -ENOMEM; @@ -221,15 +216,11 @@ retry: hmm_range->start = start; hmm_range->hmm_pfns = pfns; - *phmm_range = hmm_range; - return 0; out_free_pfns: kvfree(pfns); out_free_range: - kfree(hmm_range); - if (r == -EBUSY) r = -EAGAIN; return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index 953e1d06de20..c54e3c64251a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, - struct hmm_range **phmm_range); + struct hmm_range *hmm_range); bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index aa9ee5dffa45..890123bf8ee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -706,10 +706,11 @@ struct amdgpu_ttm_tt { * memory and start HMM tracking CPU page table update * * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only - * once afterwards to stop HMM tracking + * once afterwards to stop HMM tracking. Its the caller responsibility to ensure + * that range is a valid memory and it is freed too. */ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range **range) + struct hmm_range *range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -719,9 +720,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, bool readonly; int r = 0; - /* Make sure get_user_pages_done() can cleanup gracefully */ - *range = NULL; - mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 0be2728aa872..64109912ae9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -192,14 +192,14 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range **range); + struct hmm_range *range); void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, struct hmm_range *range); bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range **range) + struct hmm_range *range) { return -EPERM; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9d72411c3379..8c3787b00f36 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1737,12 +1737,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } WRITE_ONCE(p->svms.faulting_task, current); + hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, readonly, owner, - &hmm_range); + hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); - if (r) + if (r) { + kfree(hmm_range); pr_debug("failed %d to get svm range pages\n", r); + } } else { r = -EFAULT; } -- cgit v1.2.3 From b1dd0db1c668a33112bfb26618c090163700e368 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 30 Sep 2025 13:45:11 +0530 Subject: drm/amdgpu: clean up amdgpu hmm range functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up the amdgpu hmm range functions for clearer definition of each. a. Split amdgpu_ttm_tt_get_user_pages_done into two: 1. amdgpu_hmm_range_valid: To check if the user pages are valid and update seq num 2. amdgpu_hmm_range_free: Clean up the hmm range and pfn memory. b. amdgpu_ttm_tt_get_user_pages_done and amdgpu_ttm_tt_discard_user_pages are similar function so remove discard and directly use amdgpu_hmm_range_free to clean up the hmm range and pfn memory. Suggested-by: Christian König Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 18 ++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 23 ++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 17 ++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 32 ------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 13 ---------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 ++++--- 8 files changed, 56 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 70c83e10b9c4..d0b1468a2ae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - range = kzalloc(sizeof(*range), GFP_KERNEL); + range = amdgpu_hmm_range_alloc(); if (unlikely(!range)) { ret = -ENOMEM; goto unregister_out; @@ -1097,7 +1097,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, ret = amdgpu_ttm_tt_get_user_pages(bo, range); if (ret) { - kfree(range); + amdgpu_hmm_range_free(range); if (ret == -EAGAIN) pr_debug("Failed to get user pages, try again\n"); else @@ -1120,7 +1120,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); + amdgpu_hmm_range_free(range); unregister_out: if (ret) amdgpu_hmm_unregister(bo); @@ -1923,7 +1923,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { amdgpu_hmm_unregister(mem->bo); mutex_lock(&process_info->notifier_lock); - amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); + amdgpu_hmm_range_free(mem->range); mutex_unlock(&process_info->notifier_lock); } @@ -2549,7 +2549,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); + amdgpu_hmm_range_free(mem->range); mem->range = NULL; /* BO reservations and getting user pages (hmm_range_fault) @@ -2573,13 +2573,13 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } } - mem->range = kzalloc(sizeof(*mem->range), GFP_KERNEL); + mem->range = amdgpu_hmm_range_alloc(); if (unlikely(!mem->range)) return -ENOMEM; /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range); if (ret) { - kfree(mem->range); + amdgpu_hmm_range_free(mem->range); mem->range = NULL; pr_debug("Failed %d to get user pages\n", ret); @@ -2753,8 +2753,8 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i continue; /* Only check mem with hmm range associated */ - valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + valid = amdgpu_hmm_range_valid(mem->range); + amdgpu_hmm_range_free(mem->range); mem->range = NULL; if (!valid) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 59951d075703..c319d216421e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -41,6 +41,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_gem.h" #include "amdgpu_ras.h" +#include "amdgpu_hmm.h" static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, struct amdgpu_device *adev, @@ -891,7 +892,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - e->range = kzalloc(sizeof(*e->range), GFP_KERNEL); + e->range = amdgpu_hmm_range_alloc(); if (unlikely(!e->range)) return -ENOMEM; @@ -994,9 +995,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = e->bo; - - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + amdgpu_hmm_range_free(e->range); e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); @@ -1327,8 +1326,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, */ r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, - e->range); + r |= !amdgpu_hmm_range_valid(e->range); + amdgpu_hmm_range_free(e->range); e->range = NULL; } if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0c2a1434f03..4bc506f4924a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -572,12 +572,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - range = kzalloc(sizeof(*range), GFP_KERNEL); + range = amdgpu_hmm_range_alloc(); if (unlikely(!range)) return -ENOMEM; r = amdgpu_ttm_tt_get_user_pages(bo, range); if (r) { - kfree(range); + amdgpu_hmm_range_free(range); goto release_object; } r = amdgpu_bo_reserve(bo, true); @@ -601,8 +601,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, user_pages_done: if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); - + amdgpu_hmm_range_free(range); release_object: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 53d405a92a14..b582fd217bd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -226,14 +226,25 @@ out_free_range: return r; } -bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range) { - bool r; + if (!hmm_range) + return false; + + return !mmu_interval_read_retry(hmm_range->notifier, + hmm_range->notifier_seq); +} + +struct hmm_range *amdgpu_hmm_range_alloc(void) +{ + return kzalloc(sizeof(struct hmm_range), GFP_KERNEL); +} + +void amdgpu_hmm_range_free(struct hmm_range *hmm_range) +{ + if (!hmm_range) + return; - r = mmu_interval_read_retry(hmm_range->notifier, - hmm_range->notifier_seq); kvfree(hmm_range->hmm_pfns); kfree(hmm_range); - - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index c54e3c64251a..e85f912b8938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -35,9 +35,11 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, struct hmm_range *hmm_range); -bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) +bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range); +struct hmm_range *amdgpu_hmm_range_alloc(void); +void amdgpu_hmm_range_free(struct hmm_range *hmm_range); int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_hmm_unregister(struct amdgpu_bo *bo); #else @@ -47,7 +49,20 @@ static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } + static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {} + +static inline bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range) +{ + return false; +} + +static inline struct hmm_range *amdgpu_hmm_range_alloc(void) +{ + return NULL; +} + +static inline void amdgpu_hmm_range_free(struct hmm_range *hmm_range) {} #endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 890123bf8ee8..2fb8cd79583e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -754,38 +754,6 @@ out_unlock: return r; } -/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations - */ -void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, - struct hmm_range *range) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - - if (gtt && gtt->userptr && range) - amdgpu_hmm_range_get_pages_done(range); -} - -/* - * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change - * Check if the pages backing this ttm range have been invalidated - * - * Returns: true if pages are still valid - */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, - struct hmm_range *range) -{ - struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - - if (!gtt || !gtt->userptr || !range) - return false; - - DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", - gtt->userptr, ttm->num_pages); - - WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - - return !amdgpu_hmm_range_get_pages_done(range); -} #endif /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 64109912ae9e..bbd6e524de9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -193,25 +193,12 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range *range); -void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, - struct hmm_range *range); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, - struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range *range) { return -EPERM; } -static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, - struct hmm_range *range) -{ -} -static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, - struct hmm_range *range) -{ - return false; -} #endif void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 8c3787b00f36..91609dd5730f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1737,13 +1737,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } WRITE_ONCE(p->svms.faulting_task, current); - hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); + hmm_range = amdgpu_hmm_range_alloc(); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, readonly, owner, hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { - kfree(hmm_range); + amdgpu_hmm_range_free(hmm_range); pr_debug("failed %d to get svm range pages\n", r); } } else { @@ -1764,10 +1764,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, * Overrride return value to TRY AGAIN only if prior returns * were successful */ - if (hmm_range && amdgpu_hmm_range_get_pages_done(hmm_range) && !r) { + if (hmm_range && !amdgpu_hmm_range_valid(hmm_range) && !r) { pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; } + /* Free the hmm range */ + amdgpu_hmm_range_free(hmm_range); + if (!r && !list_empty(&prange->child_list)) { pr_debug("range split by unmap in parallel, validate again\n"); -- cgit v1.2.3 From fd98319f73af4e3e44b67290b5b8db6dfd1afa21 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:16:54 +0800 Subject: drm/amd/ras: Add ras aca parser v1.0 Add ras aca parser v1.0. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c | 379 +++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h | 71 +++++ 2 files changed, 450 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c new file mode 100644 index 000000000000..29df98948703 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_aca.h" +#include "ras_core_status.h" +#include "ras_aca_v1_0.h" + +struct ras_aca_hwip { + int hwid; + int mcatype; +}; + +static struct ras_aca_hwip aca_hwid_mcatypes[ACA_ECC_HWIP_COUNT] = { + [ACA_ECC_HWIP__SMU] = {0x01, 0x01}, + [ACA_ECC_HWIP__PCS_XGMI] = {0x50, 0x00}, + [ACA_ECC_HWIP__UMC] = {0x96, 0x00}, +}; + +static int aca_decode_bank_info(struct aca_block *aca_blk, + struct aca_bank_reg *bank, struct aca_ecc_info *info) +{ + u64 ipid; + u32 instidhi, instidlo; + + ipid = bank->regs[ACA_REG_IDX__IPID]; + info->hwid = ACA_REG_IPID_HARDWAREID(ipid); + info->mcatype = ACA_REG_IPID_MCATYPE(ipid); + /* + * Unified DieID Format: SAASS. A:AID, S:Socket. + * Unified DieID[4:4] = InstanceId[0:0] + * Unified DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = ACA_REG_IPID_INSTANCEIDHI(ipid); + instidlo = ACA_REG_IPID_INSTANCEIDLO(ipid); + info->die_id = ((instidhi >> 2) & 0x03); + info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03); + + if ((aca_blk->blk_info->hwip == ACA_ECC_HWIP__SMU) && + (aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__GFX)) + info->xcd_id = + ((instidlo & GENMASK_ULL(31, 1)) == mmSMNAID_XCD0_MCA_SMU) ? 0 : 1; + + return 0; +} + +static bool aca_check_bank_hwip(struct aca_bank_reg *bank, enum aca_ecc_hwip type) +{ + struct ras_aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || (type == ACA_ECC_HWIP__UNKNOWN)) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX__IPID]; + hwid = ACA_REG_IPID_HARDWAREID(ipid); + mcatype = ACA_REG_IPID_MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + +static bool aca_match_bank_default(struct aca_block *aca_blk, void *data) +{ + return aca_check_bank_hwip((struct aca_bank_reg *)data, aca_blk->blk_info->hwip); +} + +static bool aca_match_gfx_bank(struct aca_block *aca_blk, void *data) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + u32 instlo; + + if (!aca_check_bank_hwip(bank, aca_blk->blk_info->hwip)) + return false; + + instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]); + instlo &= GENMASK_ULL(31, 1); + switch (instlo) { + case mmSMNAID_XCD0_MCA_SMU: + case mmSMNAID_XCD1_MCA_SMU: + case mmSMNXCD_XCD0_MCA_SMU: + return true; + default: + break; + } + + return false; +} + +static bool aca_match_sdma_bank(struct aca_block *aca_blk, void *data) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + /* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ + static int sdma_err_codes[] = { 33, 34, 35, 36 }; + u32 instlo; + int errcode, i; + + if (!aca_check_bank_hwip(bank, aca_blk->blk_info->hwip)) + return false; + + instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]); + instlo &= GENMASK_ULL(31, 1); + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + errcode = ACA_REG_SYND_ERRORINFORMATION(bank->regs[ACA_REG_IDX__SYND]); + errcode &= 0xff; + + /* Check SDMA error codes */ + for (i = 0; i < ARRAY_SIZE(sdma_err_codes); i++) { + if (errcode == sdma_err_codes[i]) + return true; + } + + return false; +} + +static bool aca_match_mmhub_bank(struct aca_block *aca_blk, void *data) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + /* reference to smu driver if header file */ + const int mmhub_err_codes[] = { + 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */ + 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */ + 10, /* CODE_UTCL2_ROUTER */ + 11, /* CODE_VML2 */ + 12, /* CODE_VML2_WALKER */ + 13, /* CODE_MMCANE */ + }; + u32 instlo; + int errcode, i; + + if (!aca_check_bank_hwip(bank, aca_blk->blk_info->hwip)) + return false; + + instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]); + instlo &= GENMASK_ULL(31, 1); + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + errcode = ACA_REG_SYND_ERRORINFORMATION(bank->regs[ACA_REG_IDX__SYND]); + errcode &= 0xff; + + /* Check MMHUB error codes */ + for (i = 0; i < ARRAY_SIZE(mmhub_err_codes); i++) { + if (errcode == mmhub_err_codes[i]) + return true; + } + + return false; +} + +static bool aca_check_umc_de(struct ras_core_context *ras_core, uint64_t mc_umc_status) +{ + return (ras_core->poison_supported && + ACA_REG_STATUS_VAL(mc_umc_status) && + ACA_REG_STATUS_DEFERRED(mc_umc_status)); +} + +static bool aca_check_umc_ue(struct ras_core_context *ras_core, uint64_t mc_umc_status) +{ + if (aca_check_umc_de(ras_core, mc_umc_status)) + return false; + + return (ACA_REG_STATUS_VAL(mc_umc_status) && + (ACA_REG_STATUS_PCC(mc_umc_status) || + ACA_REG_STATUS_UC(mc_umc_status) || + ACA_REG_STATUS_TCC(mc_umc_status))); +} + +static bool aca_check_umc_ce(struct ras_core_context *ras_core, uint64_t mc_umc_status) +{ + if (aca_check_umc_de(ras_core, mc_umc_status)) + return false; + + return (ACA_REG_STATUS_VAL(mc_umc_status) && + (ACA_REG_STATUS_CECC(mc_umc_status) || + (ACA_REG_STATUS_UECC(mc_umc_status) && + ACA_REG_STATUS_UC(mc_umc_status) == 0) || + /* Identify data parity error in replay mode */ + ((ACA_REG_STATUS_ERRORCODEEXT(mc_umc_status) == 0x5 || + ACA_REG_STATUS_ERRORCODEEXT(mc_umc_status) == 0xb) && + !(aca_check_umc_ue(ras_core, mc_umc_status))))); +} + +static int aca_parse_umc_bank(struct ras_core_context *ras_core, + struct aca_block *ras_blk, void *data, void *buf) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf; + struct aca_ecc_info bank_info; + uint32_t ext_error_code; + uint64_t status0; + + status0 = bank->regs[ACA_REG_IDX__STATUS]; + if (!ACA_REG_STATUS_VAL(status0)) + return 0; + + memset(&bank_info, 0, sizeof(bank_info)); + aca_decode_bank_info(ras_blk, bank, &bank_info); + memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info)); + ecc->bank_info.status = bank->regs[ACA_REG_IDX__STATUS]; + ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID]; + ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR]; + + ext_error_code = ACA_REG_STATUS_ERRORCODEEXT(status0); + + if (aca_check_umc_de(ras_core, status0)) + ecc->de_count = 1; + else if (aca_check_umc_ue(ras_core, status0)) + ecc->ue_count = ext_error_code ? + 1 : ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]); + else if (aca_check_umc_ce(ras_core, status0)) + ecc->ce_count = ext_error_code ? + 1 : ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]); + + return 0; +} + +static bool aca_check_bank_is_de(struct ras_core_context *ras_core, + uint64_t status) +{ + return (ACA_REG_STATUS_POISON(status) || + ACA_REG_STATUS_DEFERRED(status)); +} + +static int aca_parse_bank_default(struct ras_core_context *ras_core, + struct aca_block *ras_blk, + void *data, void *buf) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf; + struct aca_ecc_info bank_info; + u64 misc0 = bank->regs[ACA_REG_IDX__MISC0]; + u64 status = bank->regs[ACA_REG_IDX__STATUS]; + + memset(&bank_info, 0, sizeof(bank_info)); + aca_decode_bank_info(ras_blk, bank, &bank_info); + memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info)); + ecc->bank_info.status = status; + ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID]; + ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR]; + + if (aca_check_bank_is_de(ras_core, status)) { + ecc->de_count = 1; + } else { + if (bank->ecc_type == RAS_ERR_TYPE__UE) + ecc->ue_count = 1; + else if (bank->ecc_type == RAS_ERR_TYPE__CE) + ecc->ce_count = ACA_REG_MISC0_ERRCNT(misc0); + } + + return 0; +} + +static int aca_parse_xgmi_bank(struct ras_core_context *ras_core, + struct aca_block *ras_blk, + void *data, void *buf) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf; + struct aca_ecc_info bank_info; + u64 status, count; + int ext_error_code; + + memset(&bank_info, 0, sizeof(bank_info)); + aca_decode_bank_info(ras_blk, bank, &bank_info); + memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info)); + ecc->bank_info.status = bank->regs[ACA_REG_IDX__STATUS]; + ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID]; + ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR]; + + status = bank->regs[ACA_REG_IDX__STATUS]; + ext_error_code = ACA_REG_STATUS_ERRORCODEEXT(status); + + count = ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]); + if (bank->ecc_type == RAS_ERR_TYPE__UE) { + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + ecc->ue_count = count; + } else if (bank->ecc_type == RAS_ERR_TYPE__CE) { + count = ext_error_code == 6 ? count : 0ULL; + ecc->ce_count = count; + } + + return 0; +} + +static const struct aca_block_info aca_v1_0_umc = { + .name = "umc", + .ras_block_id = RAS_BLOCK_ID__UMC, + .hwip = ACA_ECC_HWIP__UMC, + .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK | ACA_ERROR__DE_MASK, + .bank_ops = { + .bank_match = aca_match_bank_default, + .bank_parse = aca_parse_umc_bank, + }, +}; + +static const struct aca_block_info aca_v1_0_gfx = { + .name = "gfx", + .ras_block_id = RAS_BLOCK_ID__GFX, + .hwip = ACA_ECC_HWIP__SMU, + .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK, + .bank_ops = { + .bank_match = aca_match_gfx_bank, + .bank_parse = aca_parse_bank_default, + }, +}; + +static const struct aca_block_info aca_v1_0_sdma = { + .name = "sdma", + .ras_block_id = RAS_BLOCK_ID__SDMA, + .hwip = ACA_ECC_HWIP__SMU, + .mask = ACA_ERROR__UE_MASK, + .bank_ops = { + .bank_match = aca_match_sdma_bank, + .bank_parse = aca_parse_bank_default, + }, +}; + +static const struct aca_block_info aca_v1_0_mmhub = { + .name = "mmhub", + .ras_block_id = RAS_BLOCK_ID__MMHUB, + .hwip = ACA_ECC_HWIP__SMU, + .mask = ACA_ERROR__UE_MASK, + .bank_ops = { + .bank_match = aca_match_mmhub_bank, + .bank_parse = aca_parse_bank_default, + }, +}; + +static const struct aca_block_info aca_v1_0_xgmi = { + .name = "xgmi", + .ras_block_id = RAS_BLOCK_ID__XGMI_WAFL, + .hwip = ACA_ECC_HWIP__PCS_XGMI, + .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK, + .bank_ops = { + .bank_match = aca_match_bank_default, + .bank_parse = aca_parse_xgmi_bank, + }, +}; + +static const struct aca_block_info *aca_block_info_v1_0[] = { + &aca_v1_0_umc, + &aca_v1_0_gfx, + &aca_v1_0_sdma, + &aca_v1_0_mmhub, + &aca_v1_0_xgmi, +}; + +const struct ras_aca_ip_func ras_aca_func_v1_0 = { + .block_num = ARRAY_SIZE(aca_block_info_v1_0), + .block_info = aca_block_info_v1_0, +}; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h new file mode 100644 index 000000000000..40e5d94b037f --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_ACA_V1_0_H__ +#define __RAS_ACA_V1_0_H__ +#include "ras.h" + +#define ACA__REG__FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define ACA_REG_STATUS_VAL(x) ACA__REG__FIELD(x, 63, 63) +#define ACA_REG_STATUS_OVERFLOW(x) ACA__REG__FIELD(x, 62, 62) +#define ACA_REG_STATUS_UC(x) ACA__REG__FIELD(x, 61, 61) +#define ACA_REG_STATUS_EN(x) ACA__REG__FIELD(x, 60, 60) +#define ACA_REG_STATUS_MISCV(x) ACA__REG__FIELD(x, 59, 59) +#define ACA_REG_STATUS_ADDRV(x) ACA__REG__FIELD(x, 58, 58) +#define ACA_REG_STATUS_PCC(x) ACA__REG__FIELD(x, 57, 57) +#define ACA_REG_STATUS_ERRCOREIDVAL(x) ACA__REG__FIELD(x, 56, 56) +#define ACA_REG_STATUS_TCC(x) ACA__REG__FIELD(x, 55, 55) +#define ACA_REG_STATUS_SYNDV(x) ACA__REG__FIELD(x, 53, 53) +#define ACA_REG_STATUS_CECC(x) ACA__REG__FIELD(x, 46, 46) +#define ACA_REG_STATUS_UECC(x) ACA__REG__FIELD(x, 45, 45) +#define ACA_REG_STATUS_DEFERRED(x) ACA__REG__FIELD(x, 44, 44) +#define ACA_REG_STATUS_POISON(x) ACA__REG__FIELD(x, 43, 43) +#define ACA_REG_STATUS_SCRUB(x) ACA__REG__FIELD(x, 40, 40) +#define ACA_REG_STATUS_ERRCOREID(x) ACA__REG__FIELD(x, 37, 32) +#define ACA_REG_STATUS_ADDRLSB(x) ACA__REG__FIELD(x, 29, 24) +#define ACA_REG_STATUS_ERRORCODEEXT(x) ACA__REG__FIELD(x, 21, 16) +#define ACA_REG_STATUS_ERRORCODE(x) ACA__REG__FIELD(x, 15, 0) + +#define ACA_REG_IPID_MCATYPE(x) ACA__REG__FIELD(x, 63, 48) +#define ACA_REG_IPID_INSTANCEIDHI(x) ACA__REG__FIELD(x, 47, 44) +#define ACA_REG_IPID_HARDWAREID(x) ACA__REG__FIELD(x, 43, 32) +#define ACA_REG_IPID_INSTANCEIDLO(x) ACA__REG__FIELD(x, 31, 0) + +#define ACA_REG_MISC0_VALID(x) ACA__REG__FIELD(x, 63, 63) +#define ACA_REG_MISC0_OVRFLW(x) ACA__REG__FIELD(x, 48, 48) +#define ACA_REG_MISC0_ERRCNT(x) ACA__REG__FIELD(x, 43, 32) + +#define ACA_REG_SYND_ERRORINFORMATION(x) ACA__REG__FIELD(x, 17, 0) + +/* NOTE: The following codes refers to the smu header file */ +#define ACA_EXTERROR_CODE_CE 0x3a +#define ACA_EXTERROR_CODE_FAULT 0x3b + +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */ + +extern const struct ras_aca_ip_func ras_aca_func_v1_0; +#endif -- cgit v1.2.3 From 88e379e5b82e09f4dfed181831fb7f2d58fd7dfe Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:15:59 +0800 Subject: drm/amd/ras: Add aca common ras functions Add aca common ras functions: 1. Aca hw init/fini. 2. Get ecc count of each ras block. 3. Update query ecc count from mp1. 4. Clear ras block ecc count. V3: Update the calling function. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_aca.c | 672 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_aca.h | 164 ++++++++ 2 files changed, 836 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_aca.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_aca.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_aca.c b/drivers/gpu/drm/amd/ras/rascore/ras_aca.c new file mode 100644 index 000000000000..f9b8a1fa4f1f --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_aca.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_aca.h" +#include "ras_aca_v1_0.h" +#include "ras_mp1_v13_0.h" + +#define ACA_MARK_FATAL_FLAG 0x100 +#define ACA_MARK_UE_READ_FLAG 0x1 + +#define blk_name(block_id) ras_core_get_ras_block_name(block_id) + +static struct aca_regs_dump { + const char *name; + int reg_idx; +} aca_regs[] = { + {"CONTROL", ACA_REG_IDX__CTL}, + {"STATUS", ACA_REG_IDX__STATUS}, + {"ADDR", ACA_REG_IDX__ADDR}, + {"MISC", ACA_REG_IDX__MISC0}, + {"CONFIG", ACA_REG_IDX__CONFG}, + {"IPID", ACA_REG_IDX__IPID}, + {"SYND", ACA_REG_IDX__SYND}, + {"DESTAT", ACA_REG_IDX__DESTAT}, + {"DEADDR", ACA_REG_IDX__DEADDR}, + {"CONTROL_MASK", ACA_REG_IDX__CTL_MASK}, +}; + + +static void aca_report_ecc_info(struct ras_core_context *ras_core, + u64 seq_no, u32 blk, u32 skt, u32 aid, + struct aca_aid_ecc *aid_ecc, + struct aca_bank_ecc *new_ecc) +{ + struct aca_ecc_count ecc_count = {0}; + + ecc_count.new_ue_count = new_ecc->ue_count; + ecc_count.new_de_count = new_ecc->de_count; + ecc_count.new_ce_count = new_ecc->ce_count; + if (blk == RAS_BLOCK_ID__GFX) { + struct aca_ecc_count *xcd_ecc; + int xcd_id; + + for (xcd_id = 0; xcd_id < aid_ecc->xcd.xcd_num; xcd_id++) { + xcd_ecc = &aid_ecc->xcd.xcd[xcd_id].ecc_err; + ecc_count.total_ue_count += xcd_ecc->total_ue_count; + ecc_count.total_de_count += xcd_ecc->total_de_count; + ecc_count.total_ce_count += xcd_ecc->total_ce_count; + } + } else { + ecc_count.total_ue_count = aid_ecc->ecc_err.total_ue_count; + ecc_count.total_de_count = aid_ecc->ecc_err.total_de_count; + ecc_count.total_ce_count = aid_ecc->ecc_err.total_ce_count; + } + + if (ecc_count.new_ue_count) { + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u new uncorrectable hardware errors detected in %s block\n", + seq_no, skt, aid, ecc_count.new_ue_count, blk_name(blk)); + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u uncorrectable hardware errors detected in total in %s block\n", + seq_no, skt, aid, ecc_count.total_ue_count, blk_name(blk)); + } + + if (ecc_count.new_de_count) { + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u new %s detected in %s block\n", + seq_no, skt, aid, ecc_count.new_de_count, + (blk == RAS_BLOCK_ID__UMC) ? + "deferred hardware errors" : "poison consumption", + blk_name(blk)); + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u %s detected in total in %s block\n", + seq_no, skt, aid, ecc_count.total_de_count, + (blk == RAS_BLOCK_ID__UMC) ? + "deferred hardware errors" : "poison consumption", + blk_name(blk)); + } + + if (ecc_count.new_ce_count) { + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u new correctable hardware errors detected in %s block\n", + seq_no, skt, aid, ecc_count.new_ce_count, blk_name(blk)); + RAS_DEV_INFO(ras_core->dev, + "{%llu} socket: %d, die: %d, %u correctable hardware errors detected in total in %s block\n", + seq_no, skt, aid, ecc_count.total_ce_count, blk_name(blk)); + } +} + +static void aca_bank_log(struct ras_core_context *ras_core, + int idx, int total, struct aca_bank_reg *bank, + struct aca_bank_ecc *bank_ecc) +{ + int i; + + RAS_DEV_INFO(ras_core->dev, + "{%llu}" RAS_HW_ERR "Accelerator Check Architecture events logged\n", + bank->seq_no); + /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + RAS_DEV_INFO(ras_core->dev, + "{%llu}" RAS_HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + bank->seq_no, idx + 1, total, + aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); +} + +static void aca_log_bank_data(struct ras_core_context *ras_core, + struct aca_bank_reg *bank, struct aca_bank_ecc *bank_ecc, + struct ras_log_batch_tag *batch) +{ + if (bank_ecc->ue_count) + ras_log_ring_add_log_event(ras_core, RAS_LOG_EVENT_UE, bank->regs, batch); + else if (bank_ecc->de_count) + ras_log_ring_add_log_event(ras_core, RAS_LOG_EVENT_DE, bank->regs, batch); + else + ras_log_ring_add_log_event(ras_core, RAS_LOG_EVENT_CE, bank->regs, batch); +} + +static int aca_get_bank_count(struct ras_core_context *ras_core, + enum ras_err_type type, u32 *count) +{ + return ras_mp1_get_bank_count(ras_core, type, count); +} + +static bool aca_match_bank(struct aca_block *aca_blk, struct aca_bank_reg *bank) +{ + const struct aca_bank_hw_ops *bank_ops; + + if (!aca_blk->blk_info) + return false; + + bank_ops = &aca_blk->blk_info->bank_ops; + if (!bank_ops->bank_match) + return false; + + return bank_ops->bank_match(aca_blk, bank); +} + +static int aca_parse_bank(struct ras_core_context *ras_core, + struct aca_block *aca_blk, + struct aca_bank_reg *bank, + struct aca_bank_ecc *ecc) +{ + const struct aca_bank_hw_ops *bank_ops = &aca_blk->blk_info->bank_ops; + + if (!bank_ops || !bank_ops->bank_parse) + return -RAS_CORE_NOT_SUPPORTED; + + return bank_ops->bank_parse(ras_core, aca_blk, bank, ecc); +} + +static int aca_check_block_ecc_info(struct ras_core_context *ras_core, + struct aca_block *aca_blk, struct aca_ecc_info *info) +{ + if (info->socket_id >= aca_blk->ecc.socket_num_per_hive) { + RAS_DEV_ERR(ras_core->dev, + "Socket id (%d) is out of config! max:%u\n", + info->socket_id, aca_blk->ecc.socket_num_per_hive); + return -ENODATA; + } + + if (info->die_id >= aca_blk->ecc.socket[info->socket_id].aid_num) { + RAS_DEV_ERR(ras_core->dev, + "Die id (%d) is out of config! max:%u\n", + info->die_id, aca_blk->ecc.socket[info->socket_id].aid_num); + return -ENODATA; + } + + if ((aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__GFX) && + (info->xcd_id >= + aca_blk->ecc.socket[info->socket_id].aid[info->die_id].xcd.xcd_num)) { + RAS_DEV_ERR(ras_core->dev, + "Xcd id (%d) is out of config! max:%u\n", + info->xcd_id, + aca_blk->ecc.socket[info->socket_id].aid[info->die_id].xcd.xcd_num); + return -ENODATA; + } + + return 0; +} + +static int aca_log_bad_bank(struct ras_core_context *ras_core, + struct aca_block *aca_blk, struct aca_bank_reg *bank, + struct aca_bank_ecc *bank_ecc) +{ + struct aca_ecc_info *info; + struct aca_ecc_count *ecc_err; + struct aca_aid_ecc *aid_ecc; + int ret; + + info = &bank_ecc->bank_info; + + ret = aca_check_block_ecc_info(ras_core, aca_blk, info); + if (ret) + return ret; + + mutex_lock(&ras_core->ras_aca.aca_lock); + aid_ecc = &aca_blk->ecc.socket[info->socket_id].aid[info->die_id]; + if (aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__GFX) + ecc_err = &aid_ecc->xcd.xcd[info->xcd_id].ecc_err; + else + ecc_err = &aid_ecc->ecc_err; + + ecc_err->new_ce_count += bank_ecc->ce_count; + ecc_err->total_ce_count += bank_ecc->ce_count; + ecc_err->new_ue_count += bank_ecc->ue_count; + ecc_err->total_ue_count += bank_ecc->ue_count; + ecc_err->new_de_count += bank_ecc->de_count; + ecc_err->total_de_count += bank_ecc->de_count; + mutex_unlock(&ras_core->ras_aca.aca_lock); + + if ((aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__UMC) && + bank_ecc->de_count) { + struct ras_bank_ecc ras_ecc = {0}; + + ras_ecc.nps = ras_core_get_curr_nps_mode(ras_core); + ras_ecc.addr = bank_ecc->bank_info.addr; + ras_ecc.ipid = bank_ecc->bank_info.ipid; + ras_ecc.status = bank_ecc->bank_info.status; + ras_ecc.seq_no = bank->seq_no; + + if (ras_core_gpu_in_reset(ras_core)) + ras_umc_log_bad_bank_pending(ras_core, &ras_ecc); + else + ras_umc_log_bad_bank(ras_core, &ras_ecc); + } + + aca_report_ecc_info(ras_core, + bank->seq_no, aca_blk->blk_info->ras_block_id, info->socket_id, info->die_id, + &aca_blk->ecc.socket[info->socket_id].aid[info->die_id], bank_ecc); + + return 0; +} + +static struct aca_block *aca_get_bank_aca_block(struct ras_core_context *ras_core, + struct aca_bank_reg *bank) +{ + int i = 0; + + for (i = 0; i < RAS_BLOCK_ID__LAST; i++) + if (aca_match_bank(&ras_core->ras_aca.aca_blk[i], bank)) + return &ras_core->ras_aca.aca_blk[i]; + + return NULL; +} + +static int aca_dump_bank(struct ras_core_context *ras_core, u32 ecc_type, + int idx, void *data) +{ + struct aca_bank_reg *bank = (struct aca_bank_reg *)data; + int i, ret, reg_cnt; + + reg_cnt = min_t(int, 16, ARRAY_SIZE(bank->regs)); + for (i = 0; i < reg_cnt; i++) { + ret = ras_mp1_dump_bank(ras_core, ecc_type, idx, i, &bank->regs[i]); + if (ret) + return ret; + } + + return 0; +} + +static uint64_t aca_get_bank_seqno(struct ras_core_context *ras_core, + enum ras_err_type err_type, struct aca_block *aca_blk, + struct aca_bank_ecc *bank_ecc) +{ + uint64_t seq_no = 0; + + if (bank_ecc->de_count) { + if (aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__UMC) + seq_no = ras_core_get_seqno(ras_core, RAS_SEQNO_TYPE_DE, true); + else + seq_no = ras_core_get_seqno(ras_core, + RAS_SEQNO_TYPE_POISON_CONSUMPTION, true); + } else if (bank_ecc->ue_count) { + seq_no = ras_core_get_seqno(ras_core, RAS_SEQNO_TYPE_UE, true); + } else { + seq_no = ras_core_get_seqno(ras_core, RAS_SEQNO_TYPE_CE, true); + } + + return seq_no; +} + +static bool aca_dup_update_ue_in_fatal(struct ras_core_context *ras_core, + u32 ecc_type) +{ + struct ras_aca *aca = &ras_core->ras_aca; + + if (ecc_type != RAS_ERR_TYPE__UE) + return false; + + if (aca->ue_updated_mark & ACA_MARK_FATAL_FLAG) { + if (aca->ue_updated_mark & ACA_MARK_UE_READ_FLAG) + return true; + + aca->ue_updated_mark |= ACA_MARK_UE_READ_FLAG; + } + + return false; +} + +void ras_aca_mark_fatal_flag(struct ras_core_context *ras_core) +{ + struct ras_aca *aca = &ras_core->ras_aca; + + if (!aca) + return; + + aca->ue_updated_mark |= ACA_MARK_FATAL_FLAG; +} + +void ras_aca_clear_fatal_flag(struct ras_core_context *ras_core) +{ + struct ras_aca *aca = &ras_core->ras_aca; + + if (!aca) + return; + + if ((aca->ue_updated_mark & ACA_MARK_FATAL_FLAG) && + (aca->ue_updated_mark & ACA_MARK_UE_READ_FLAG)) + aca->ue_updated_mark = 0; +} + +static int aca_banks_update(struct ras_core_context *ras_core, + u32 ecc_type, void *data) +{ + struct aca_bank_reg bank; + struct aca_block *aca_blk; + struct aca_bank_ecc bank_ecc; + struct ras_log_batch_tag *batch_tag = NULL; + u32 count = 0; + int ret; + int i; + + mutex_lock(&ras_core->ras_aca.bank_op_lock); + + if (aca_dup_update_ue_in_fatal(ras_core, ecc_type)) + goto out; + + ret = aca_get_bank_count(ras_core, ecc_type, &count); + if (ret) + goto out; + + if (!count) + goto out; + + batch_tag = ras_log_ring_create_batch_tag(ras_core); + for (i = 0; i < count; i++) { + memset(&bank, 0, sizeof(bank)); + ret = aca_dump_bank(ras_core, ecc_type, i, &bank); + if (ret) + break; + + bank.ecc_type = ecc_type; + + memset(&bank_ecc, 0, sizeof(bank_ecc)); + aca_blk = aca_get_bank_aca_block(ras_core, &bank); + if (aca_blk) + ret = aca_parse_bank(ras_core, aca_blk, &bank, &bank_ecc); + + bank.seq_no = aca_get_bank_seqno(ras_core, ecc_type, aca_blk, &bank_ecc); + + aca_log_bank_data(ras_core, &bank, &bank_ecc, batch_tag); + aca_bank_log(ras_core, i, count, &bank, &bank_ecc); + + if (!ret && aca_blk) + ret = aca_log_bad_bank(ras_core, aca_blk, &bank, &bank_ecc); + + if (ret) + break; + } + ras_log_ring_destroy_batch_tag(ras_core, batch_tag); + +out: + mutex_unlock(&ras_core->ras_aca.bank_op_lock); + return ret; +} + +int ras_aca_update_ecc(struct ras_core_context *ras_core, u32 type, void *data) +{ + /* Update aca bank to aca source error_cache first */ + return aca_banks_update(ras_core, type, data); +} + +static struct aca_block *ras_aca_get_block_handle(struct ras_core_context *ras_core, uint32_t blk) +{ + return &ras_core->ras_aca.aca_blk[blk]; +} + +static int ras_aca_clear_block_ecc_count(struct ras_core_context *ras_core, u32 blk) +{ + struct aca_block *aca_blk; + struct aca_aid_ecc *aid_ecc; + int skt, aid, xcd; + + mutex_lock(&ras_core->ras_aca.aca_lock); + aca_blk = ras_aca_get_block_handle(ras_core, blk); + for (skt = 0; skt < aca_blk->ecc.socket_num_per_hive; skt++) { + for (aid = 0; aid < aca_blk->ecc.socket[skt].aid_num; aid++) { + aid_ecc = &aca_blk->ecc.socket[skt].aid[aid]; + if (blk == RAS_BLOCK_ID__GFX) { + for (xcd = 0; xcd < aid_ecc->xcd.xcd_num; xcd++) + memset(&aid_ecc->xcd.xcd[xcd], + 0, sizeof(struct aca_xcd_ecc)); + } else { + memset(&aid_ecc->ecc_err, 0, sizeof(aid_ecc->ecc_err)); + } + } + } + mutex_unlock(&ras_core->ras_aca.aca_lock); + + return 0; +} + +int ras_aca_clear_all_blocks_ecc_count(struct ras_core_context *ras_core) +{ + enum ras_block_id blk; + int ret; + + for (blk = RAS_BLOCK_ID__UMC; blk < RAS_BLOCK_ID__LAST; blk++) { + ret = ras_aca_clear_block_ecc_count(ras_core, blk); + if (ret) + break; + } + + return ret; +} + +int ras_aca_clear_block_new_ecc_count(struct ras_core_context *ras_core, u32 blk) +{ + struct aca_block *aca_blk; + int skt, aid, xcd; + struct aca_ecc_count *ecc_err; + struct aca_aid_ecc *aid_ecc; + + mutex_lock(&ras_core->ras_aca.aca_lock); + aca_blk = ras_aca_get_block_handle(ras_core, blk); + for (skt = 0; skt < aca_blk->ecc.socket_num_per_hive; skt++) { + for (aid = 0; aid < aca_blk->ecc.socket[skt].aid_num; aid++) { + aid_ecc = &aca_blk->ecc.socket[skt].aid[aid]; + if (blk == RAS_BLOCK_ID__GFX) { + for (xcd = 0; xcd < aid_ecc->xcd.xcd_num; xcd++) { + ecc_err = &aid_ecc->xcd.xcd[xcd].ecc_err; + ecc_err->new_ce_count = 0; + ecc_err->new_ue_count = 0; + ecc_err->new_de_count = 0; + } + } else { + ecc_err = &aid_ecc->ecc_err; + ecc_err->new_ce_count = 0; + ecc_err->new_ue_count = 0; + ecc_err->new_de_count = 0; + } + } + } + mutex_unlock(&ras_core->ras_aca.aca_lock); + + return 0; +} + +static int ras_aca_get_block_each_aid_ecc_count(struct ras_core_context *ras_core, + u32 blk, u32 skt, u32 aid, u32 xcd, + struct aca_ecc_count *ecc_count) +{ + struct aca_block *aca_blk; + struct aca_ecc_count *ecc_err; + + aca_blk = ras_aca_get_block_handle(ras_core, blk); + if (blk == RAS_BLOCK_ID__GFX) + ecc_err = &aca_blk->ecc.socket[skt].aid[aid].xcd.xcd[xcd].ecc_err; + else + ecc_err = &aca_blk->ecc.socket[skt].aid[aid].ecc_err; + + ecc_count->new_ce_count = ecc_err->new_ce_count; + ecc_count->total_ce_count = ecc_err->total_ce_count; + ecc_count->new_ue_count = ecc_err->new_ue_count; + ecc_count->total_ue_count = ecc_err->total_ue_count; + ecc_count->new_de_count = ecc_err->new_de_count; + ecc_count->total_de_count = ecc_err->total_de_count; + + return 0; +} + +static inline void _add_ecc_count(struct aca_ecc_count *des, struct aca_ecc_count *src) +{ + des->new_ce_count += src->new_ce_count; + des->total_ce_count += src->total_ce_count; + des->new_ue_count += src->new_ue_count; + des->total_ue_count += src->total_ue_count; + des->new_de_count += src->new_de_count; + des->total_de_count += src->total_de_count; +} + +static const struct ras_aca_ip_func *aca_get_ip_func( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(1, 0, 0): + return &ras_aca_func_v1_0; + default: + RAS_DEV_ERR(ras_core->dev, + "ACA ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +int ras_aca_get_block_ecc_count(struct ras_core_context *ras_core, + u32 blk, void *data) +{ + struct ras_ecc_count *err_data = (struct ras_ecc_count *)data; + struct aca_block *aca_blk; + int skt, aid, xcd; + struct aca_ecc_count ecc_xcd; + struct aca_ecc_count ecc_aid; + struct aca_ecc_count ecc; + + if (blk >= RAS_BLOCK_ID__LAST) + return -EINVAL; + + if (!err_data) + return -EINVAL; + + aca_blk = ras_aca_get_block_handle(ras_core, blk); + memset(&ecc, 0, sizeof(ecc)); + + mutex_lock(&ras_core->ras_aca.aca_lock); + if (blk == RAS_BLOCK_ID__GFX) { + for (skt = 0; skt < aca_blk->ecc.socket_num_per_hive; skt++) { + for (aid = 0; aid < aca_blk->ecc.socket[skt].aid_num; aid++) { + memset(&ecc_aid, 0, sizeof(ecc_aid)); + for (xcd = 0; + xcd < aca_blk->ecc.socket[skt].aid[aid].xcd.xcd_num; + xcd++) { + memset(&ecc_xcd, 0, sizeof(ecc_xcd)); + if (ras_aca_get_block_each_aid_ecc_count(ras_core, + blk, skt, aid, xcd, &ecc_xcd)) + continue; + _add_ecc_count(&ecc_aid, &ecc_xcd); + } + _add_ecc_count(&ecc, &ecc_aid); + } + } + } else { + for (skt = 0; skt < aca_blk->ecc.socket_num_per_hive; skt++) { + for (aid = 0; aid < aca_blk->ecc.socket[skt].aid_num; aid++) { + memset(&ecc_aid, 0, sizeof(ecc_aid)); + if (ras_aca_get_block_each_aid_ecc_count(ras_core, + blk, skt, aid, 0, &ecc_aid)) + continue; + _add_ecc_count(&ecc, &ecc_aid); + } + } + } + + err_data->new_ce_count = ecc.new_ce_count; + err_data->total_ce_count = ecc.total_ce_count; + err_data->new_ue_count = ecc.new_ue_count; + err_data->total_ue_count = ecc.total_ue_count; + err_data->new_de_count = ecc.new_de_count; + err_data->total_de_count = ecc.total_de_count; + mutex_unlock(&ras_core->ras_aca.aca_lock); + + return 0; +} + +int ras_aca_sw_init(struct ras_core_context *ras_core) +{ + struct ras_aca *ras_aca = &ras_core->ras_aca; + struct ras_aca_config *aca_cfg = &ras_core->config->aca_cfg; + struct aca_block *aca_blk; + uint32_t socket_num_per_hive; + uint32_t aid_num_per_socket; + uint32_t xcd_num_per_aid; + int blk, skt, aid; + + socket_num_per_hive = aca_cfg->socket_num_per_hive; + aid_num_per_socket = aca_cfg->aid_num_per_socket; + xcd_num_per_aid = aca_cfg->xcd_num_per_aid; + + if (!xcd_num_per_aid || !aid_num_per_socket || + (socket_num_per_hive > MAX_SOCKET_NUM_PER_HIVE) || + (aid_num_per_socket > MAX_AID_NUM_PER_SOCKET) || + (xcd_num_per_aid > MAX_XCD_NUM_PER_AID)) { + RAS_DEV_ERR(ras_core->dev, "Invalid ACA system configuration: %d, %d, %d\n", + socket_num_per_hive, aid_num_per_socket, xcd_num_per_aid); + return -EINVAL; + } + + memset(ras_aca, 0, sizeof(*ras_aca)); + + for (blk = 0; blk < RAS_BLOCK_ID__LAST; blk++) { + aca_blk = &ras_aca->aca_blk[blk]; + aca_blk->ecc.socket_num_per_hive = socket_num_per_hive; + for (skt = 0; skt < aca_blk->ecc.socket_num_per_hive; skt++) { + aca_blk->ecc.socket[skt].aid_num = aid_num_per_socket; + if (blk == RAS_BLOCK_ID__GFX) { + for (aid = 0; aid < aca_blk->ecc.socket[skt].aid_num; aid++) + aca_blk->ecc.socket[skt].aid[aid].xcd.xcd_num = + xcd_num_per_aid; + } + } + } + + mutex_init(&ras_aca->aca_lock); + mutex_init(&ras_aca->bank_op_lock); + + return 0; +} + +int ras_aca_sw_fini(struct ras_core_context *ras_core) +{ + struct ras_aca *ras_aca = &ras_core->ras_aca; + + mutex_destroy(&ras_aca->aca_lock); + mutex_destroy(&ras_aca->bank_op_lock); + + return 0; +} + +int ras_aca_hw_init(struct ras_core_context *ras_core) +{ + struct ras_aca *ras_aca = &ras_core->ras_aca; + struct aca_block *aca_blk; + const struct ras_aca_ip_func *ip_func; + int i; + + ras_aca->aca_ip_version = ras_core->config->aca_ip_version; + ip_func = aca_get_ip_func(ras_core, ras_aca->aca_ip_version); + if (!ip_func) + return -EINVAL; + + for (i = 0; i < ip_func->block_num; i++) { + aca_blk = &ras_aca->aca_blk[ip_func->block_info[i]->ras_block_id]; + aca_blk->blk_info = ip_func->block_info[i]; + } + + ras_aca->ue_updated_mark = 0; + + return 0; +} + +int ras_aca_hw_fini(struct ras_core_context *ras_core) +{ + struct ras_aca *ras_aca = &ras_core->ras_aca; + + ras_aca->ue_updated_mark = 0; + + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_aca.h b/drivers/gpu/drm/amd/ras/rascore/ras_aca.h new file mode 100644 index 000000000000..f61b02a5f0fc --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_aca.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_ACA_H__ +#define __RAS_ACA_H__ +#include "ras.h" + +#define MAX_SOCKET_NUM_PER_HIVE 8 +#define MAX_AID_NUM_PER_SOCKET 4 +#define MAX_XCD_NUM_PER_AID 2 +#define MAX_ACA_RAS_BLOCK 20 + +#define ACA_ERROR__UE_MASK (0x1 << RAS_ERR_TYPE__UE) +#define ACA_ERROR__CE_MASK (0x1 << RAS_ERR_TYPE__CE) +#define ACA_ERROR__DE_MASK (0x1 << RAS_ERR_TYPE__DE) + +enum ras_aca_reg_idx { + ACA_REG_IDX__CTL = 0, + ACA_REG_IDX__STATUS = 1, + ACA_REG_IDX__ADDR = 2, + ACA_REG_IDX__MISC0 = 3, + ACA_REG_IDX__CONFG = 4, + ACA_REG_IDX__IPID = 5, + ACA_REG_IDX__SYND = 6, + ACA_REG_IDX__DESTAT = 8, + ACA_REG_IDX__DEADDR = 9, + ACA_REG_IDX__CTL_MASK = 10, + ACA_REG_MAX_COUNT = 16, +}; + +struct ras_core_context; +struct aca_block; + +struct aca_bank_reg { + u32 ecc_type; + u64 seq_no; + u64 regs[ACA_REG_MAX_COUNT]; +}; + +enum aca_ecc_hwip { + ACA_ECC_HWIP__UNKNOWN = -1, + ACA_ECC_HWIP__PSP = 0, + ACA_ECC_HWIP__UMC, + ACA_ECC_HWIP__SMU, + ACA_ECC_HWIP__PCS_XGMI, + ACA_ECC_HWIP_COUNT, +}; + +struct aca_ecc_info { + int die_id; + int socket_id; + int xcd_id; + int hwid; + int mcatype; + uint64_t status; + uint64_t ipid; + uint64_t addr; +}; + +struct aca_bank_ecc { + struct aca_ecc_info bank_info; + u32 ce_count; + u32 ue_count; + u32 de_count; +}; + +struct aca_ecc_count { + u32 new_ce_count; + u32 total_ce_count; + u32 new_ue_count; + u32 total_ue_count; + u32 new_de_count; + u32 total_de_count; +}; + +struct aca_xcd_ecc { + struct aca_ecc_count ecc_err; +}; + +struct aca_aid_ecc { + union { + struct aca_xcd { + struct aca_xcd_ecc xcd[MAX_XCD_NUM_PER_AID]; + u32 xcd_num; + } xcd; + struct aca_ecc_count ecc_err; + }; +}; + +struct aca_socket_ecc { + struct aca_aid_ecc aid[MAX_AID_NUM_PER_SOCKET]; + u32 aid_num; +}; + +struct aca_block_ecc { + struct aca_socket_ecc socket[MAX_SOCKET_NUM_PER_HIVE]; + u32 socket_num_per_hive; +}; + +struct aca_bank_hw_ops { + bool (*bank_match)(struct aca_block *ras_blk, void *data); + int (*bank_parse)(struct ras_core_context *ras_core, + struct aca_block *aca_blk, void *data, void *buf); +}; + +struct aca_block_info { + char name[32]; + u32 ras_block_id; + enum aca_ecc_hwip hwip; + struct aca_bank_hw_ops bank_ops; + u32 mask; +}; + +struct aca_block { + const struct aca_block_info *blk_info; + struct aca_block_ecc ecc; +}; + +struct ras_aca_ip_func { + uint32_t block_num; + const struct aca_block_info **block_info; +}; + +struct ras_aca { + uint32_t aca_ip_version; + const struct ras_aca_ip_func *ip_func; + struct mutex aca_lock; + struct mutex bank_op_lock; + struct aca_block aca_blk[MAX_ACA_RAS_BLOCK]; + uint32_t ue_updated_mark; +}; + +int ras_aca_sw_init(struct ras_core_context *ras_core); +int ras_aca_sw_fini(struct ras_core_context *ras_core); +int ras_aca_hw_init(struct ras_core_context *ras_core); +int ras_aca_hw_fini(struct ras_core_context *ras_core); +int ras_aca_get_block_ecc_count(struct ras_core_context *ras_core, u32 blk, void *data); +int ras_aca_clear_block_new_ecc_count(struct ras_core_context *ras_core, u32 blk); +int ras_aca_clear_all_blocks_ecc_count(struct ras_core_context *ras_core); +int ras_aca_update_ecc(struct ras_core_context *ras_core, u32 ecc_type, void *data); +void ras_aca_mark_fatal_flag(struct ras_core_context *ras_core); +void ras_aca_clear_fatal_flag(struct ras_core_context *ras_core); +#endif -- cgit v1.2.3 From 71abe27a9a592632866fd78e33af7f8b211034aa Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:21:41 +0800 Subject: drm/amd/ras: Add mp1 v13_0 ras functions Add mp1 v13_0 ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c | 105 ++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h | 30 +++++++ 2 files changed, 135 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c new file mode 100644 index 000000000000..310d39fc816b --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_mp1.h" +#include "ras_core_status.h" +#include "ras_mp1_v13_0.h" + +#define RAS_MP1_MSG_QueryValidMcaCount 0x36 +#define RAS_MP1_MSG_McaBankDumpDW 0x37 +#define RAS_MP1_MSG_ClearMcaOnRead 0x39 +#define RAS_MP1_MSG_QueryValidMcaCeCount 0x3A +#define RAS_MP1_MSG_McaBankCeDumpDW 0x3B + +#define MAX_UE_BANKS_PER_QUERY 12 +#define MAX_CE_BANKS_PER_QUERY 12 + +static int mp1_v13_0_get_bank_count(struct ras_core_context *ras_core, + enum ras_err_type type, u32 *count) +{ + struct ras_mp1 *mp1 = &ras_core->ras_mp1; + const struct ras_mp1_sys_func *sys_func = mp1->sys_func; + uint32_t bank_count = 0; + u32 msg; + int ret; + + if (!count) + return -EINVAL; + + if (!sys_func || !sys_func->mp1_get_valid_bank_count) + return -RAS_CORE_NOT_SUPPORTED; + + switch (type) { + case RAS_ERR_TYPE__UE: + msg = RAS_MP1_MSG_QueryValidMcaCount; + break; + case RAS_ERR_TYPE__CE: + case RAS_ERR_TYPE__DE: + msg = RAS_MP1_MSG_QueryValidMcaCeCount; + break; + default: + return -EINVAL; + } + + ret = sys_func->mp1_get_valid_bank_count(ras_core, msg, &bank_count); + if (!ret) { + if (((type == RAS_ERR_TYPE__UE) && (bank_count >= MAX_UE_BANKS_PER_QUERY)) || + ((type == RAS_ERR_TYPE__CE) && (bank_count >= MAX_CE_BANKS_PER_QUERY))) + return -EINVAL; + + *count = bank_count; + } + + return ret; +} + +static int mp1_v13_0_dump_bank(struct ras_core_context *ras_core, + enum ras_err_type type, u32 idx, u32 reg_idx, u64 *val) +{ + struct ras_mp1 *mp1 = &ras_core->ras_mp1; + const struct ras_mp1_sys_func *sys_func = mp1->sys_func; + u32 msg; + + if (!sys_func || !sys_func->mp1_dump_valid_bank) + return -RAS_CORE_NOT_SUPPORTED; + + switch (type) { + case RAS_ERR_TYPE__UE: + msg = RAS_MP1_MSG_McaBankDumpDW; + break; + case RAS_ERR_TYPE__CE: + case RAS_ERR_TYPE__DE: + msg = RAS_MP1_MSG_McaBankCeDumpDW; + break; + default: + return -EINVAL; + } + + return sys_func->mp1_dump_valid_bank(ras_core, msg, idx, reg_idx, val); +} + +const struct ras_mp1_ip_func mp1_ras_func_v13_0 = { + .get_valid_bank_count = mp1_v13_0_get_bank_count, + .dump_valid_bank = mp1_v13_0_dump_bank, +}; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h new file mode 100644 index 000000000000..2edfdb5f6a75 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_mp1_v13_0.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_MP1_V13_0_H__ +#define __RAS_MP1_V13_0_H__ +#include "ras_mp1.h" + +extern const struct ras_mp1_ip_func mp1_ras_func_v13_0; + +#endif -- cgit v1.2.3 From adf0e0e0892348db63b015937b645326673f9e26 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:20:28 +0800 Subject: drm/amd/ras: Add mp1 common ras functions Add mp1 common ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_mp1.c | 81 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_mp1.h | 50 +++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_mp1.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_mp1.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c new file mode 100644 index 000000000000..92f250e2466d --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras.h" +#include "ras_mp1.h" +#include "ras_mp1_v13_0.h" + +static const struct ras_mp1_ip_func *ras_mp1_get_ip_funcs( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): + return &mp1_ras_func_v13_0; + default: + RAS_DEV_ERR(ras_core->dev, + "MP1 ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +int ras_mp1_get_bank_count(struct ras_core_context *ras_core, + enum ras_err_type type, u32 *count) +{ + struct ras_mp1 *mp1 = &ras_core->ras_mp1; + + return mp1->ip_func->get_valid_bank_count(ras_core, type, count); +} + +int ras_mp1_dump_bank(struct ras_core_context *ras_core, + enum ras_err_type type, u32 idx, u32 reg_idx, u64 *val) +{ + struct ras_mp1 *mp1 = &ras_core->ras_mp1; + + return mp1->ip_func->dump_valid_bank(ras_core, type, idx, reg_idx, val); +} + +int ras_mp1_hw_init(struct ras_core_context *ras_core) +{ + struct ras_mp1 *mp1 = &ras_core->ras_mp1; + + mp1->mp1_ip_version = ras_core->config->mp1_ip_version; + mp1->sys_func = ras_core->config->mp1_cfg.mp1_sys_fn; + if (!mp1->sys_func) { + RAS_DEV_ERR(ras_core->dev, "RAS mp1 sys function not configured!\n"); + return -EINVAL; + } + + mp1->ip_func = ras_mp1_get_ip_funcs(ras_core, mp1->mp1_ip_version); + + return mp1->ip_func ? RAS_CORE_OK : -EINVAL; +} + +int ras_mp1_hw_fini(struct ras_core_context *ras_core) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_mp1.h b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.h new file mode 100644 index 000000000000..de1d08286f41 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_MP1_H__ +#define __RAS_MP1_H__ +#include "ras.h" + +enum ras_err_type; +struct ras_mp1_ip_func { + int (*get_valid_bank_count)(struct ras_core_context *ras_core, + enum ras_err_type type, u32 *count); + int (*dump_valid_bank)(struct ras_core_context *ras_core, + enum ras_err_type type, u32 idx, u32 reg_idx, u64 *val); +}; + +struct ras_mp1 { + uint32_t mp1_ip_version; + const struct ras_mp1_ip_func *ip_func; + const struct ras_mp1_sys_func *sys_func; +}; + +int ras_mp1_hw_init(struct ras_core_context *ras_core); +int ras_mp1_hw_fini(struct ras_core_context *ras_core); + +int ras_mp1_get_bank_count(struct ras_core_context *ras_core, + enum ras_err_type type, u32 *count); + +int ras_mp1_dump_bank(struct ras_core_context *ras_core, + u32 ecc_type, u32 idx, u32 reg_idx, u64 *val); +#endif -- cgit v1.2.3 From fa4fe20f4502290ce4aa618f396eb918b2b1e041 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:25:59 +0800 Subject: drm/amd/ras: Add nbio v7_9 ras functions Add nbio v7_9 ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c | 123 ++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h | 31 ++++++ 2 files changed, 154 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c b/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c new file mode 100644 index 000000000000..f17d708ec668 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras.h" +#include "ras_nbio_v7_9.h" + +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR__SHIFT 0x12 +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR_MASK 0x00040000L +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS__SHIFT 0x2 +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS_MASK 0x00000004L +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_CNTLR_INTERRUPT_CLEAR__SHIFT 0x11 +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_CNTLR_INTERRUPT_CLEAR_MASK 0x00020000L +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_CNTLR_INTERRUPT_STATUS__SHIFT 0x1 +#define BIF_BX0_BIF_DOORBELL_INT_CNTL__RAS_CNTLR_INTERRUPT_STATUS_MASK 0x00000002L + +#define regBIF_BX0_BIF_DOORBELL_INT_CNTL_BASE_IDX 2 +#define regBIF_BX0_BIF_DOORBELL_INT_CNTL 0x00fe + +#define regBIF_BX0_BIF_INTR_CNTL 0x0101 +#define regBIF_BX0_BIF_INTR_CNTL_BASE_IDX 2 + +/* BIF_BX0_BIF_INTR_CNTL */ +#define BIF_BX0_BIF_INTR_CNTL__RAS_INTR_VEC_SEL__SHIFT 0x0 +#define BIF_BX0_BIF_INTR_CNTL__RAS_INTR_VEC_SEL_MASK 0x00000001L + +#define regBIF_BX_PF0_PARTITION_MEM_STATUS 0x0164 +#define regBIF_BX_PF0_PARTITION_MEM_STATUS_BASE_IDX 2 +/* BIF_BX_PF0_PARTITION_MEM_STATUS */ +#define BIF_BX_PF0_PARTITION_MEM_STATUS__CHANGE_STATUE__SHIFT 0x0 +#define BIF_BX_PF0_PARTITION_MEM_STATUS__NPS_MODE__SHIFT 0x4 +#define BIF_BX_PF0_PARTITION_MEM_STATUS__CHANGE_STATUE_MASK 0x0000000FL +#define BIF_BX_PF0_PARTITION_MEM_STATUS__NPS_MODE_MASK 0x00000FF0L + + +static int nbio_v7_9_handle_ras_controller_intr_no_bifring(struct ras_core_context *ras_core) +{ + uint32_t bif_doorbell_intr_cntl = 0; + + bif_doorbell_intr_cntl = + RAS_DEV_RREG32_SOC15(ras_core->dev, NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + + RAS_DEV_WREG32_SOC15(ras_core->dev, + NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + /* TODO: handle ras controller interrupt */ + } + + return 0; +} + +static int nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct ras_core_context *ras_core) +{ + uint32_t bif_doorbell_intr_cntl = 0; + int ret = 0; + + bif_doorbell_intr_cntl = + RAS_DEV_RREG32_SOC15(ras_core->dev, NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + + RAS_DEV_WREG32_SOC15(ras_core->dev, + NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + ret = ras_core_handle_fatal_error(ras_core); + } + + return ret; +} + +static uint32_t nbio_v7_9_get_memory_partition_mode(struct ras_core_context *ras_core) +{ + uint32_t mem_status; + uint32_t mem_mode; + + mem_status = + RAS_DEV_RREG32_SOC15(ras_core->dev, NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS); + + /* Each bit represents a mode 1-8*/ + mem_mode = REG_GET_FIELD(mem_status, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE); + + return ffs(mem_mode); +} + +const struct ras_nbio_ip_func ras_nbio_v7_9 = { + .handle_ras_controller_intr_no_bifring = + nbio_v7_9_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = + nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring, + .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, +}; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h b/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h new file mode 100644 index 000000000000..8711c82a927f --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_nbio_v7_9.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_NBIO_V7_9_H__ +#define __RAS_NBIO_V7_9_H__ +#include "ras_nbio.h" + +extern const struct ras_nbio_ip_func ras_nbio_v7_9; + +#endif -- cgit v1.2.3 From df2d8574c5a1c9a20ec3523e0b88556b778b6205 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:25:04 +0800 Subject: drm/amd/ras: Add nbio common ras functions Add nbio common ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_nbio.c | 95 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_nbio.h | 46 +++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_nbio.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_nbio.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_nbio.c b/drivers/gpu/drm/amd/ras/rascore/ras_nbio.c new file mode 100644 index 000000000000..8bf1f35d595e --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_nbio.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras.h" +#include "ras_nbio.h" +#include "ras_nbio_v7_9.h" + +static const struct ras_nbio_ip_func *ras_nbio_get_ip_funcs( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(7, 9, 0): + return &ras_nbio_v7_9; + default: + RAS_DEV_ERR(ras_core->dev, + "NBIO ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +int ras_nbio_hw_init(struct ras_core_context *ras_core) +{ + struct ras_nbio *nbio = &ras_core->ras_nbio; + + nbio->nbio_ip_version = ras_core->config->nbio_ip_version; + nbio->sys_func = ras_core->config->nbio_cfg.nbio_sys_fn; + if (!nbio->sys_func) { + RAS_DEV_ERR(ras_core->dev, "RAS nbio sys function not configured!\n"); + return -EINVAL; + } + + nbio->ip_func = ras_nbio_get_ip_funcs(ras_core, nbio->nbio_ip_version); + if (!nbio->ip_func) + return -EINVAL; + + if (nbio->sys_func) { + if (nbio->sys_func->set_ras_controller_irq_state) + nbio->sys_func->set_ras_controller_irq_state(ras_core, true); + if (nbio->sys_func->set_ras_err_event_athub_irq_state) + nbio->sys_func->set_ras_err_event_athub_irq_state(ras_core, true); + } + + return 0; +} + +int ras_nbio_hw_fini(struct ras_core_context *ras_core) +{ + struct ras_nbio *nbio = &ras_core->ras_nbio; + + if (nbio->sys_func) { + if (nbio->sys_func->set_ras_controller_irq_state) + nbio->sys_func->set_ras_controller_irq_state(ras_core, false); + if (nbio->sys_func->set_ras_err_event_athub_irq_state) + nbio->sys_func->set_ras_err_event_athub_irq_state(ras_core, false); + } + + return 0; +} + +bool ras_nbio_handle_irq_error(struct ras_core_context *ras_core, void *data) +{ + struct ras_nbio *nbio = &ras_core->ras_nbio; + + if (nbio->ip_func) { + if (nbio->ip_func->handle_ras_controller_intr_no_bifring) + nbio->ip_func->handle_ras_controller_intr_no_bifring(ras_core); + if (nbio->ip_func->handle_ras_err_event_athub_intr_no_bifring) + nbio->ip_func->handle_ras_err_event_athub_intr_no_bifring(ras_core); + } + + return true; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_nbio.h b/drivers/gpu/drm/amd/ras/rascore/ras_nbio.h new file mode 100644 index 000000000000..0a1313e59a02 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_nbio.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_NBIO_H__ +#define __RAS_NBIO_H__ +#include "ras.h" + +struct ras_core_context; + +struct ras_nbio_ip_func { + int (*handle_ras_controller_intr_no_bifring)(struct ras_core_context *ras_core); + int (*handle_ras_err_event_athub_intr_no_bifring)(struct ras_core_context *ras_core); + uint32_t (*get_memory_partition_mode)(struct ras_core_context *ras_core); +}; + +struct ras_nbio { + uint32_t nbio_ip_version; + const struct ras_nbio_ip_func *ip_func; + const struct ras_nbio_sys_func *sys_func; +}; + +int ras_nbio_hw_init(struct ras_core_context *ras_core); +int ras_nbio_hw_fini(struct ras_core_context *ras_core); +bool ras_nbio_handle_irq_error(struct ras_core_context *ras_core, void *data); +#endif -- cgit v1.2.3 From 8bd7fe95a49901fed07c172cb1344bd2643840fd Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:27:44 +0800 Subject: drm/amd/ras: Add umc v12_0 ras functions Add umc v12_0 ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c | 511 ++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h | 314 +++++++++++++++ 2 files changed, 825 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c new file mode 100644 index 000000000000..5d9a11c17a86 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_umc.h" +#include "ras_core_status.h" +#include "ras_umc_v12_0.h" + +#define NumDieInterleaved 4 + +static const uint32_t umc_v12_0_channel_idx_tbl[] + [UMC_V12_0_UMC_INSTANCE_NUM][UMC_V12_0_CHANNEL_INSTANCE_NUM] = { + {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, + {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, + {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, + {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, + {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, + {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, + {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, + {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} +}; + +/* mapping of MCA error address to normalized address */ +static const uint32_t umc_v12_0_ma2na_mapping[] = { + 0, 5, 6, 8, 9, 14, 12, 13, + 10, 11, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, + 24, 7, 29, 30, +}; + +static bool umc_v12_0_bit_wise_xor(uint32_t val) +{ + bool result = 0; + int i; + + for (i = 0; i < 32; i++) + result = result ^ ((val >> i) & 0x1); + + return result; +} + +static void __get_nps_pa_flip_bits(struct ras_core_context *ras_core, + enum umc_memory_partition_mode nps, + struct umc_flip_bits *flip_bits) +{ + uint32_t vram_type = ras_core->ras_umc.umc_vram_type; + + /* default setting */ + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; + flip_bits->flip_row_bit = 13; + flip_bits->bit_num = 4; + flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; + + if (nps == UMC_MEMORY_PARTITION_MODE_NPS2) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; + } else if (nps == UMC_MEMORY_PARTITION_MODE_NPS4) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; + } + + switch (vram_type) { + case UMC_VRAM_TYPE_HBM: + /* other nps modes are taken as nps1 */ + if (nps == UMC_MEMORY_PARTITION_MODE_NPS2) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + else if (nps == UMC_MEMORY_PARTITION_MODE_NPS4) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + + break; + case UMC_VRAM_TYPE_HBM3E: + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + flip_bits->flip_row_bit = 12; + + if (nps == UMC_MEMORY_PARTITION_MODE_NPS2) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + else if (nps == UMC_MEMORY_PARTITION_MODE_NPS4) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + + break; + default: + RAS_DEV_WARN(ras_core->dev, + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); + break; + } +} + +static uint64_t convert_nps_pa_to_row_pa(struct ras_core_context *ras_core, + uint64_t pa, enum umc_memory_partition_mode nps, bool zero_pfn_ok) +{ + struct umc_flip_bits flip_bits = {0}; + uint64_t row_pa; + int i; + + __get_nps_pa_flip_bits(ras_core, nps, &flip_bits); + + row_pa = pa; + /* clear loop bits in soc physical address */ + for (i = 0; i < flip_bits.bit_num; i++) + row_pa &= ~BIT_ULL(flip_bits.flip_bits_in_pa[i]); + + if (!zero_pfn_ok && !RAS_ADDR_TO_PFN(row_pa)) + row_pa |= BIT_ULL(flip_bits.flip_bits_in_pa[2]); + + return row_pa; +} + +static int lookup_bad_pages_in_a_row(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps, + uint64_t *pfns, uint32_t num, + uint64_t seq_no, bool dump) +{ + uint32_t col, col_lower, row, row_lower, idx, row_high; + uint64_t soc_pa, row_pa, column, err_addr; + uint64_t retired_addr = RAS_PFN_TO_ADDR(record->cur_nps_retired_row_pfn); + struct umc_flip_bits flip_bits = {0}; + uint32_t retire_unit; + uint32_t i; + + __get_nps_pa_flip_bits(ras_core, nps, &flip_bits); + + row_pa = convert_nps_pa_to_row_pa(ras_core, retired_addr, nps, true); + + err_addr = record->address; + /* get column bit 0 and 1 in mca address */ + col_lower = (err_addr >> 1) & 0x3ULL; + /* MA_R13_BIT will be handled later */ + row_lower = (err_addr >> UMC_V12_0_MCA_R0_BIT) & 0x1fffULL; + row_lower &= ~BIT_ULL(flip_bits.flip_row_bit); + + if (ras_core->ras_gfx.gfx_ip_version >= IP_VERSION(9, 5, 0)) { + row_high = (row_pa >> flip_bits.r13_in_pa) & 0x3ULL; + /* it's 2.25GB in each channel, from MCA address to PA + * [R14 R13] is converted if the two bits value are 0x3, + * get them from PA instead of MCA address. + */ + row_lower |= (row_high << 13); + } + + idx = 0; + row = 0; + retire_unit = 0x1 << flip_bits.bit_num; + /* loop for all possibilities of retire bits */ + for (column = 0; column < retire_unit; column++) { + soc_pa = row_pa; + for (i = 0; i < flip_bits.bit_num; i++) + soc_pa |= (((column >> i) & 0x1ULL) << flip_bits.flip_bits_in_pa[i]); + + col = ((column & 0x7) << 2) | col_lower; + + /* add row bit 13 */ + if (flip_bits.bit_num == UMC_PA_FLIP_BITS_NUM) + row = ((column >> 3) << flip_bits.flip_row_bit) | row_lower; + + if (dump) + RAS_DEV_INFO(ras_core->dev, + "{%llu} Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + seq_no, soc_pa, row, col, + record->cur_nps_bank, record->mem_channel); + + + if (pfns && (idx < num)) + pfns[idx++] = RAS_ADDR_TO_PFN(soc_pa); + } + + return idx; +} + +static int umc_v12_convert_ma_to_pa(struct ras_core_context *ras_core, + struct umc_mca_addr *addr_in, struct umc_phy_addr *addr_out, + uint32_t nps) +{ + uint32_t i, na_shift; + uint64_t soc_pa, na, na_nps; + uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row; + uint32_t bank0, bank1, bank2, bank3, bank; + uint32_t ch_inst = addr_in->ch_inst; + uint32_t umc_inst = addr_in->umc_inst; + uint32_t node_inst = addr_in->node_inst; + uint32_t socket_id = addr_in->socket_id; + uint32_t channel_index; + uint64_t err_addr = addr_in->err_addr; + + if (node_inst != UMC_INV_AID_NODE) { + if (ch_inst >= UMC_V12_0_CHANNEL_INSTANCE_NUM || + umc_inst >= UMC_V12_0_UMC_INSTANCE_NUM || + node_inst >= UMC_V12_0_AID_NUM_MAX || + socket_id >= UMC_V12_0_SOCKET_NUM_MAX) + return -EINVAL; + } else { + if (socket_id >= UMC_V12_0_SOCKET_NUM_MAX || + ch_inst >= UMC_V12_0_TOTAL_CHANNEL_NUM) + return -EINVAL; + } + + bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; + bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; + bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; + bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; + col = (err_addr >> 1) & 0x1fULL; + row = (err_addr >> 10) & 0x3fffULL; + + /* apply bank hash algorithm */ + bank0 = + bank_hash0 ^ (UMC_V12_0_XOR_EN0 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); + bank1 = + bank_hash1 ^ (UMC_V12_0_XOR_EN1 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); + bank2 = + bank_hash2 ^ (UMC_V12_0_XOR_EN2 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); + bank3 = + bank_hash3 ^ (UMC_V12_0_XOR_EN3 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); + + bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); + err_addr &= ~0x3c0ULL; + err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); + + na_nps = 0x0; + /* convert mca error address to normalized address */ + for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) + na_nps |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; + + if (nps == UMC_MEMORY_PARTITION_MODE_NPS1) + na_shift = 8; + else if (nps == UMC_MEMORY_PARTITION_MODE_NPS2) + na_shift = 9; + else if (nps == UMC_MEMORY_PARTITION_MODE_NPS4) + na_shift = 10; + else if (nps == UMC_MEMORY_PARTITION_MODE_NPS8) + na_shift = 11; + else + return -EINVAL; + + na = ((na_nps >> na_shift) << 8) | (na_nps & 0xff); + + if (node_inst != UMC_INV_AID_NODE) + channel_index = + umc_v12_0_channel_idx_tbl[node_inst][umc_inst][ch_inst]; + else { + channel_index = ch_inst; + node_inst = channel_index / + (UMC_V12_0_UMC_INSTANCE_NUM * UMC_V12_0_CHANNEL_INSTANCE_NUM); + } + + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_32KB_BLOCK(na) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(na); + + /* calc channel hash based on absolute address */ + soc_pa += socket_id * SOCKET_LFB_SIZE; + /* the umc channel bits are not original values, they are hashed */ + UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); + /* restore pa */ + soc_pa -= socket_id * SOCKET_LFB_SIZE; + + /* get some channel bits from na_nps directly and + * add nps section offset + */ + if (nps == UMC_MEMORY_PARTITION_MODE_NPS2) { + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_CH5_BIT); + soc_pa |= ((na_nps & 0x100) << 5); + soc_pa += (node_inst >> 1) * (SOCKET_LFB_SIZE >> 1); + } else if (nps == UMC_MEMORY_PARTITION_MODE_NPS4) { + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_CH4_BIT); + soc_pa |= ((na_nps & 0x300) << 4); + soc_pa += node_inst * (SOCKET_LFB_SIZE >> 2); + } else if (nps == UMC_MEMORY_PARTITION_MODE_NPS8) { + soc_pa &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); + soc_pa |= ((na_nps & 0x700) << 4); + soc_pa += node_inst * (SOCKET_LFB_SIZE >> 2) + + (channel_index >> 4) * (SOCKET_LFB_SIZE >> 3); + } + + addr_out->pa = soc_pa; + addr_out->bank = bank; + addr_out->channel_idx = channel_index; + + return 0; +} + +static int convert_ma_to_pa(struct ras_core_context *ras_core, + struct umc_mca_addr *addr_in, struct umc_phy_addr *addr_out, + uint32_t nps) +{ + int ret; + + if (ras_psp_check_supported_cmd(ras_core, RAS_TA_CMD_ID__QUERY_ADDRESS)) + ret = ras_umc_psp_convert_ma_to_pa(ras_core, + addr_in, addr_out, nps); + else + ret = umc_v12_convert_ma_to_pa(ras_core, + addr_in, addr_out, nps); + + return ret; +} + +static int convert_bank_to_nps_addr(struct ras_core_context *ras_core, + struct ras_bank_ecc *bank, struct umc_phy_addr *pa_addr, uint32_t nps) +{ + struct umc_mca_addr addr_in; + struct umc_phy_addr addr_out; + int ret; + + memset(&addr_in, 0, sizeof(addr_in)); + memset(&addr_out, 0, sizeof(addr_out)); + + addr_in.err_addr = ACA_ADDR_2_ERR_ADDR(bank->addr); + addr_in.ch_inst = ACA_IPID_2_UMC_CH(bank->ipid); + addr_in.umc_inst = ACA_IPID_2_UMC_INST(bank->ipid); + addr_in.node_inst = ACA_IPID_2_DIE_ID(bank->ipid); + addr_in.socket_id = ACA_IPID_2_SOCKET_ID(bank->ipid); + + ret = convert_ma_to_pa(ras_core, &addr_in, &addr_out, nps); + if (!ret) { + pa_addr->pa = + convert_nps_pa_to_row_pa(ras_core, addr_out.pa, nps, false); + pa_addr->channel_idx = addr_out.channel_idx; + pa_addr->bank = addr_out.bank; + } + + return ret; +} + +static int umc_v12_0_bank_to_eeprom_record(struct ras_core_context *ras_core, + struct ras_bank_ecc *bank, struct eeprom_umc_record *record) +{ + struct umc_phy_addr nps_addr; + int ret; + + memset(&nps_addr, 0, sizeof(nps_addr)); + + ret = convert_bank_to_nps_addr(ras_core, bank, + &nps_addr, bank->nps); + if (ret) + return ret; + + ras_umc_fill_eeprom_record(ras_core, + ACA_ADDR_2_ERR_ADDR(bank->addr), ACA_IPID_2_UMC_INST(bank->ipid), + &nps_addr, bank->nps, record); + + lookup_bad_pages_in_a_row(ras_core, record, + bank->nps, NULL, 0, bank->seq_no, true); + + return 0; +} + +static int convert_eeprom_record_to_nps_addr(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint64_t *pa, uint32_t nps) +{ + struct device_system_info dev_info = {0}; + struct umc_mca_addr addr_in; + struct umc_phy_addr addr_out; + int ret; + + memset(&addr_in, 0, sizeof(addr_in)); + memset(&addr_out, 0, sizeof(addr_out)); + + ras_core_get_device_system_info(ras_core, &dev_info); + + addr_in.err_addr = record->address; + addr_in.ch_inst = record->mem_channel; + addr_in.umc_inst = record->mcumc_id; + addr_in.node_inst = UMC_INV_AID_NODE; + addr_in.socket_id = dev_info.socket_id; + + ret = convert_ma_to_pa(ras_core, &addr_in, &addr_out, nps); + if (ret) + return ret; + + *pa = convert_nps_pa_to_row_pa(ras_core, addr_out.pa, nps, false); + + return 0; +} + +static int umc_v12_0_eeprom_record_to_nps_record(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps) +{ + uint64_t pa = 0; + int ret = 0; + + if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) { + record->cur_nps_retired_row_pfn = EEPROM_RECORD_UMC_ADDR_PFN(record); + } else { + ret = convert_eeprom_record_to_nps_addr(ras_core, + record, &pa, nps); + if (!ret) + record->cur_nps_retired_row_pfn = RAS_ADDR_TO_PFN(pa); + } + + record->cur_nps = nps; + + return ret; +} + +static int umc_v12_0_eeprom_record_to_nps_pages(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps, + uint64_t *pfns, uint32_t num) +{ + return lookup_bad_pages_in_a_row(ras_core, + record, nps, pfns, num, 0, false); +} + +static int umc_12_0_soc_pa_to_bank(struct ras_core_context *ras_core, + uint64_t soc_pa, + struct umc_bank_addr *bank_addr) +{ + + int channel_hashed = 0; + int channel_real = 0; + int channel_reversed = 0; + int i = 0; + + bank_addr->stack_id = UMC_V12_0_SOC_PA_TO_SID(soc_pa); + bank_addr->bank_group = 0; /* This is a combination of SID & Bank. Needed?? */ + bank_addr->bank = UMC_V12_0_SOC_PA_TO_BANK(soc_pa); + bank_addr->row = UMC_V12_0_SOC_PA_TO_ROW(soc_pa); + bank_addr->column = UMC_V12_0_SOC_PA_TO_COL(soc_pa); + + /* Channel bits 4-6 are hashed. Bruteforce reverse the hash */ + channel_hashed = (soc_pa >> UMC_V12_0_PA_CH4_BIT) & 0x7; + + for (i = 0; i < 8; i++) { + channel_reversed = 0; + channel_reversed |= UMC_V12_0_CHANNEL_HASH_CH4((i << 4), soc_pa); + channel_reversed |= (UMC_V12_0_CHANNEL_HASH_CH5((i << 4), soc_pa) << 1); + channel_reversed |= (UMC_V12_0_CHANNEL_HASH_CH6((i << 4), soc_pa) << 2); + if (channel_reversed == channel_hashed) + channel_real = ((i << 4)) | ((soc_pa >> UMC_V12_0_PA_CH0_BIT) & 0xf); + } + + bank_addr->channel = channel_real; + bank_addr->subchannel = UMC_V12_0_SOC_PA_TO_PC(soc_pa); + + return 0; +} + +static int umc_12_0_bank_to_soc_pa(struct ras_core_context *ras_core, + struct umc_bank_addr bank_addr, + uint64_t *soc_pa) +{ + uint64_t na = 0; + uint64_t tmp_pa = 0; + *soc_pa = 0; + + tmp_pa |= UMC_V12_0_SOC_SID_TO_PA(bank_addr.stack_id); + tmp_pa |= UMC_V12_0_SOC_BANK_TO_PA(bank_addr.bank); + tmp_pa |= UMC_V12_0_SOC_ROW_TO_PA(bank_addr.row); + tmp_pa |= UMC_V12_0_SOC_COL_TO_PA(bank_addr.column); + tmp_pa |= UMC_V12_0_SOC_CH_TO_PA(bank_addr.channel); + tmp_pa |= UMC_V12_0_SOC_PC_TO_PA(bank_addr.subchannel); + + /* Get the NA */ + na = ((tmp_pa >> UMC_V12_0_PA_C2_BIT) << UMC_V12_0_NA_C2_BIT); + na |= tmp_pa & 0xff; + + /* translate umc channel address to soc pa, 3 parts are included */ + tmp_pa = ADDR_OF_32KB_BLOCK(na) | + ADDR_OF_256B_BLOCK(bank_addr.channel) | + OFFSET_IN_256B_BLOCK(na); + + /* the umc channel bits are not original values, they are hashed */ + UMC_V12_0_SET_CHANNEL_HASH(bank_addr.channel, tmp_pa); + + *soc_pa = tmp_pa; + + return 0; +} + +const struct ras_umc_ip_func ras_umc_func_v12_0 = { + .bank_to_eeprom_record = umc_v12_0_bank_to_eeprom_record, + .eeprom_record_to_nps_record = umc_v12_0_eeprom_record_to_nps_record, + .eeprom_record_to_nps_pages = umc_v12_0_eeprom_record_to_nps_pages, + .bank_to_soc_pa = umc_12_0_bank_to_soc_pa, + .soc_pa_to_bank = umc_12_0_soc_pa_to_bank, +}; + diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h new file mode 100644 index 000000000000..8a35ad856165 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.h @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_UMC_V12_0_H__ +#define __RAS_UMC_V12_0_H__ +#include "ras.h" + +/* MCA_UMC_UMC0_MCUMC_ADDRT0 */ +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved_MASK 0xFF00000000000000L + +/* MCMP1_IPIDT0 */ +#define MCMP1_IPIDT0__InstanceIdLo__SHIFT 0x0 +#define MCMP1_IPIDT0__HardwareID__SHIFT 0x20 +#define MCMP1_IPIDT0__InstanceIdHi__SHIFT 0x2c +#define MCMP1_IPIDT0__McaType__SHIFT 0x30 + +#define MCMP1_IPIDT0__InstanceIdLo_MASK 0x00000000FFFFFFFFL +#define MCMP1_IPIDT0__HardwareID_MASK 0x00000FFF00000000L +#define MCMP1_IPIDT0__InstanceIdHi_MASK 0x0000F00000000000L +#define MCMP1_IPIDT0__McaType_MASK 0xFFFF000000000000L + +/* number of umc channel instance with memory map register access */ +#define UMC_V12_0_CHANNEL_INSTANCE_NUM 8 +/* number of umc instance with memory map register access */ +#define UMC_V12_0_UMC_INSTANCE_NUM 4 + +/* one piece of normalized address is mapped to 8 pieces of physical address */ +#define UMC_V12_0_NA_MAP_PA_NUM 8 + +/* bank bits in MCA error address */ +#define UMC_V12_0_MCA_B0_BIT 6 +#define UMC_V12_0_MCA_B1_BIT 7 +#define UMC_V12_0_MCA_B2_BIT 8 +#define UMC_V12_0_MCA_B3_BIT 9 + +/* row bits in MCA address */ +#define UMC_V12_0_MCA_R0_BIT 10 + +/* Stack ID bits in SOC physical address */ +#define UMC_V12_0_PA_SID1_BIT 37 +#define UMC_V12_0_PA_SID0_BIT 36 + +/* bank bits in SOC physical address */ +#define UMC_V12_0_PA_B3_BIT 18 +#define UMC_V12_0_PA_B2_BIT 17 +#define UMC_V12_0_PA_B1_BIT 20 +#define UMC_V12_0_PA_B0_BIT 19 + +/* row bits in SOC physical address */ +#define UMC_V12_0_PA_R13_BIT 35 +#define UMC_V12_0_PA_R12_BIT 34 +#define UMC_V12_0_PA_R11_BIT 33 +#define UMC_V12_0_PA_R10_BIT 32 +#define UMC_V12_0_PA_R9_BIT 31 +#define UMC_V12_0_PA_R8_BIT 30 +#define UMC_V12_0_PA_R7_BIT 29 +#define UMC_V12_0_PA_R6_BIT 28 +#define UMC_V12_0_PA_R5_BIT 27 +#define UMC_V12_0_PA_R4_BIT 26 +#define UMC_V12_0_PA_R3_BIT 25 +#define UMC_V12_0_PA_R2_BIT 24 +#define UMC_V12_0_PA_R1_BIT 23 +#define UMC_V12_0_PA_R0_BIT 22 + +/* column bits in SOC physical address */ +#define UMC_V12_0_PA_C4_BIT 21 +#define UMC_V12_0_PA_C3_BIT 16 +#define UMC_V12_0_PA_C2_BIT 15 +#define UMC_V12_0_PA_C1_BIT 6 +#define UMC_V12_0_PA_C0_BIT 5 + +/* channel index bits in SOC physical address */ +#define UMC_V12_0_PA_CH6_BIT 14 +#define UMC_V12_0_PA_CH5_BIT 13 +#define UMC_V12_0_PA_CH4_BIT 12 +#define UMC_V12_0_PA_CH3_BIT 11 +#define UMC_V12_0_PA_CH2_BIT 10 +#define UMC_V12_0_PA_CH1_BIT 9 +#define UMC_V12_0_PA_CH0_BIT 8 + +/* Pseudochannel index bits in SOC physical address */ +#define UMC_V12_0_PA_PC0_BIT 7 + +#define UMC_V12_0_NA_C2_BIT 8 + +#define UMC_V12_0_SOC_PA_TO_SID(pa) \ + ((((pa >> UMC_V12_0_PA_SID0_BIT) & 0x1ULL) << 0ULL) | \ + (((pa >> UMC_V12_0_PA_SID1_BIT) & 0x1ULL) << 1ULL)) + +#define UMC_V12_0_SOC_PA_TO_BANK(pa) \ + ((((pa >> UMC_V12_0_PA_B0_BIT) & 0x1ULL) << 0ULL) | \ + (((pa >> UMC_V12_0_PA_B1_BIT) & 0x1ULL) << 1ULL) | \ + (((pa >> UMC_V12_0_PA_B2_BIT) & 0x1ULL) << 2ULL) | \ + (((pa >> UMC_V12_0_PA_B3_BIT) & 0x1ULL) << 3ULL)) + +#define UMC_V12_0_SOC_PA_TO_ROW(pa) \ + ((((pa >> UMC_V12_0_PA_R0_BIT) & 0x1ULL) << 0ULL) | \ + (((pa >> UMC_V12_0_PA_R1_BIT) & 0x1ULL) << 1ULL) | \ + (((pa >> UMC_V12_0_PA_R2_BIT) & 0x1ULL) << 2ULL) | \ + (((pa >> UMC_V12_0_PA_R3_BIT) & 0x1ULL) << 3ULL) | \ + (((pa >> UMC_V12_0_PA_R4_BIT) & 0x1ULL) << 4ULL) | \ + (((pa >> UMC_V12_0_PA_R5_BIT) & 0x1ULL) << 5ULL) | \ + (((pa >> UMC_V12_0_PA_R6_BIT) & 0x1ULL) << 6ULL) | \ + (((pa >> UMC_V12_0_PA_R7_BIT) & 0x1ULL) << 7ULL) | \ + (((pa >> UMC_V12_0_PA_R8_BIT) & 0x1ULL) << 8ULL) | \ + (((pa >> UMC_V12_0_PA_R9_BIT) & 0x1ULL) << 9ULL) | \ + (((pa >> UMC_V12_0_PA_R10_BIT) & 0x1ULL) << 10ULL) | \ + (((pa >> UMC_V12_0_PA_R11_BIT) & 0x1ULL) << 11ULL) | \ + (((pa >> UMC_V12_0_PA_R12_BIT) & 0x1ULL) << 12ULL) | \ + (((pa >> UMC_V12_0_PA_R13_BIT) & 0x1ULL) << 13ULL)) + +#define UMC_V12_0_SOC_PA_TO_COL(pa) \ + ((((pa >> UMC_V12_0_PA_C0_BIT) & 0x1ULL) << 0ULL) | \ + (((pa >> UMC_V12_0_PA_C1_BIT) & 0x1ULL) << 1ULL) | \ + (((pa >> UMC_V12_0_PA_C2_BIT) & 0x1ULL) << 2ULL) | \ + (((pa >> UMC_V12_0_PA_C3_BIT) & 0x1ULL) << 3ULL) | \ + (((pa >> UMC_V12_0_PA_C4_BIT) & 0x1ULL) << 4ULL)) + +#define UMC_V12_0_SOC_PA_TO_CH(pa) \ + ((((pa >> UMC_V12_0_PA_CH0_BIT) & 0x1ULL) << 0ULL) | \ + (((pa >> UMC_V12_0_PA_CH1_BIT) & 0x1ULL) << 1ULL) | \ + (((pa >> UMC_V12_0_PA_CH2_BIT) & 0x1ULL) << 2ULL) | \ + (((pa >> UMC_V12_0_PA_CH3_BIT) & 0x1ULL) << 3ULL) | \ + (((pa >> UMC_V12_0_PA_CH4_BIT) & 0x1ULL) << 4ULL) | \ + (((pa >> UMC_V12_0_PA_CH5_BIT) & 0x1ULL) << 5ULL) | \ + (((pa >> UMC_V12_0_PA_CH6_BIT) & 0x1ULL) << 6ULL)) + +#define UMC_V12_0_SOC_PA_TO_PC(pa) (((pa >> UMC_V12_0_PA_PC0_BIT) & 0x1ULL) << 0ULL) + +#define UMC_V12_0_SOC_SID_TO_PA(sid) \ + ((((sid >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_SID0_BIT) | \ + (((sid >> 1ULL) & 0x1ULL) << UMC_V12_0_PA_SID1_BIT)) + +#define UMC_V12_0_SOC_BANK_TO_PA(bank) \ + ((((bank >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_B0_BIT) | \ + (((bank >> 1ULL) & 0x1ULL) << UMC_V12_0_PA_B1_BIT) | \ + (((bank >> 2ULL) & 0x1ULL) << UMC_V12_0_PA_B2_BIT) | \ + (((bank >> 3ULL) & 0x1ULL) << UMC_V12_0_PA_B3_BIT)) + +#define UMC_V12_0_SOC_ROW_TO_PA(row) \ + ((((row >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_R0_BIT) | \ + (((row >> 1ULL) & 0x1ULL) << UMC_V12_0_PA_R1_BIT) | \ + (((row >> 2ULL) & 0x1ULL) << UMC_V12_0_PA_R2_BIT) | \ + (((row >> 3ULL) & 0x1ULL) << UMC_V12_0_PA_R3_BIT) | \ + (((row >> 4ULL) & 0x1ULL) << UMC_V12_0_PA_R4_BIT) | \ + (((row >> 5ULL) & 0x1ULL) << UMC_V12_0_PA_R5_BIT) | \ + (((row >> 6ULL) & 0x1ULL) << UMC_V12_0_PA_R6_BIT) | \ + (((row >> 7ULL) & 0x1ULL) << UMC_V12_0_PA_R7_BIT) | \ + (((row >> 8ULL) & 0x1ULL) << UMC_V12_0_PA_R8_BIT) | \ + (((row >> 9ULL) & 0x1ULL) << UMC_V12_0_PA_R9_BIT) | \ + (((row >> 10ULL) & 0x1ULL) << UMC_V12_0_PA_R10_BIT) | \ + (((row >> 11ULL) & 0x1ULL) << UMC_V12_0_PA_R11_BIT) | \ + (((row >> 12ULL) & 0x1ULL) << UMC_V12_0_PA_R12_BIT) | \ + (((row >> 13ULL) & 0x1ULL) << UMC_V12_0_PA_R13_BIT)) + +#define UMC_V12_0_SOC_COL_TO_PA(col) \ + ((((col >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_C0_BIT) | \ + (((col >> 1ULL) & 0x1ULL) << UMC_V12_0_PA_C1_BIT) | \ + (((col >> 2ULL) & 0x1ULL) << UMC_V12_0_PA_C2_BIT) | \ + (((col >> 3ULL) & 0x1ULL) << UMC_V12_0_PA_C3_BIT) | \ + (((col >> 4ULL) & 0x1ULL) << UMC_V12_0_PA_C4_BIT)) + +#define UMC_V12_0_SOC_CH_TO_PA(ch) \ + ((((ch >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_CH0_BIT) | \ + (((ch >> 1ULL) & 0x1ULL) << UMC_V12_0_PA_CH1_BIT) | \ + (((ch >> 2ULL) & 0x1ULL) << UMC_V12_0_PA_CH2_BIT) | \ + (((ch >> 3ULL) & 0x1ULL) << UMC_V12_0_PA_CH3_BIT) | \ + (((ch >> 4ULL) & 0x1ULL) << UMC_V12_0_PA_CH4_BIT) | \ + (((ch >> 5ULL) & 0x1ULL) << UMC_V12_0_PA_CH5_BIT) | \ + (((ch >> 6ULL) & 0x1ULL) << UMC_V12_0_PA_CH6_BIT)) + +#define UMC_V12_0_SOC_PC_TO_PA(pc) (((pc >> 0ULL) & 0x1ULL) << UMC_V12_0_PA_PC0_BIT) + +/* bank hash settings */ +#define UMC_V12_0_XOR_EN0 1 +#define UMC_V12_0_XOR_EN1 1 +#define UMC_V12_0_XOR_EN2 1 +#define UMC_V12_0_XOR_EN3 1 +#define UMC_V12_0_COL_XOR0 0x0 +#define UMC_V12_0_COL_XOR1 0x0 +#define UMC_V12_0_COL_XOR2 0x800 +#define UMC_V12_0_COL_XOR3 0x1000 +#define UMC_V12_0_ROW_XOR0 0x11111 +#define UMC_V12_0_ROW_XOR1 0x22222 +#define UMC_V12_0_ROW_XOR2 0x4444 +#define UMC_V12_0_ROW_XOR3 0x8888 + +/* channel hash settings */ +#define UMC_V12_0_HASH_4K 0 +#define UMC_V12_0_HASH_64K 1 +#define UMC_V12_0_HASH_2M 1 +#define UMC_V12_0_HASH_1G 1 +#define UMC_V12_0_HASH_1T 1 + +/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), + * hash bit is only effective when related setting is enabled + */ +#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ + (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) +#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ + (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) +#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ + (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ + (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_1T)) +#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ + (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ + } while (0) + + +/* + * (addr / 256) * 4096, the higher 26 bits in ErrorAddr + * is the index of 4KB block + */ +#define ADDR_OF_4KB_BLOCK(addr) (((addr) & ~0xffULL) << 4) +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* + * (addr / 256) * 32768, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + + +#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ + ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ + (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ + (0x1ULL << UMC_V12_0_PA_R13_BIT))) + +#define ACA_IPID_HI_2_UMC_AID(_ipid_hi) (((_ipid_hi) >> 2) & 0x3) +#define ACA_IPID_LO_2_UMC_CH(_ipid_lo) \ + (((((_ipid_lo) >> 20) & 0x1) * 4) + (((_ipid_lo) >> 12) & 0xF)) +#define ACA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) + +#define ACA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03) +#define ACA_IPID_2_UMC_CH(ipid) \ + (ACA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define ACA_IPID_2_UMC_INST(ipid) \ + (ACA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define ACA_IPID_2_SOCKET_ID(ipid) \ + (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ + (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) + +#define ACA_ADDR_2_ERR_ADDR(addr) \ + REG_GET_FIELD(addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr) + +/* R13 bit shift should be considered, double the number */ +#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) + + +/* C2, C3, C4, R13, four MCA bits are looped in page retirement */ +#define UMC_V12_0_RETIRE_LOOP_BITS 4 + +/* invalid node instance value */ +#define UMC_INV_AID_NODE 0xffff + +#define UMC_V12_0_AID_NUM_MAX 4 +#define UMC_V12_0_SOCKET_NUM_MAX 8 + +#define UMC_V12_0_TOTAL_CHANNEL_NUM \ + (UMC_V12_0_AID_NUM_MAX * UMC_V12_0_UMC_INSTANCE_NUM * UMC_V12_0_CHANNEL_INSTANCE_NUM) + +/* one device has 192GB HBM */ +#define SOCKET_LFB_SIZE 0x3000000000ULL + +extern const struct ras_umc_ip_func ras_umc_func_v12_0; + +int ras_umc_get_badpage_count(struct ras_core_context *ras_core); +int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record); +#endif + -- cgit v1.2.3 From 7a3f9c0992c4f8e3d9c0c340ec46e4a8ebd1c7d5 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:26:49 +0800 Subject: drm/amd/ras: Add umc common ras functions Add umc common ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_umc.c | 706 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_umc.h | 166 +++++++ 2 files changed, 872 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_umc.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_umc.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c new file mode 100644 index 000000000000..4067359bb299 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_umc.h" +#include "ras_umc_v12_0.h" + +#define MAX_ECC_NUM_PER_RETIREMENT 16 + +/* bad page timestamp format + * yy[31:27] mm[26:23] day[22:17] hh[16:12] mm[11:6] ss[5:0] + */ +#define EEPROM_TIMESTAMP_MINUTE 6 +#define EEPROM_TIMESTAMP_HOUR 12 +#define EEPROM_TIMESTAMP_DAY 17 +#define EEPROM_TIMESTAMP_MONTH 23 +#define EEPROM_TIMESTAMP_YEAR 27 + +static uint64_t ras_umc_get_eeprom_timestamp(struct ras_core_context *ras_core) +{ + struct ras_time tm = {0}; + uint64_t utc_timestamp = 0; + uint64_t eeprom_timestamp = 0; + + utc_timestamp = ras_core_get_utc_second_timestamp(ras_core); + if (!utc_timestamp) + return utc_timestamp; + + ras_core_convert_timestamp_to_time(ras_core, utc_timestamp, &tm); + + /* the year range is 2000 ~ 2031, set the year if not in the range */ + if (tm.tm_year < 2000) + tm.tm_year = 2000; + if (tm.tm_year > 2031) + tm.tm_year = 2031; + + tm.tm_year -= 2000; + + eeprom_timestamp = tm.tm_sec + (tm.tm_min << EEPROM_TIMESTAMP_MINUTE) + + (tm.tm_hour << EEPROM_TIMESTAMP_HOUR) + + (tm.tm_mday << EEPROM_TIMESTAMP_DAY) + + (tm.tm_mon << EEPROM_TIMESTAMP_MONTH) + + (tm.tm_year << EEPROM_TIMESTAMP_YEAR); + eeprom_timestamp &= 0xffffffff; + + return eeprom_timestamp; +} + +static const struct ras_umc_ip_func *ras_umc_get_ip_func( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(12, 0, 0): + return &ras_umc_func_v12_0; + default: + RAS_DEV_ERR(ras_core->dev, + "UMC ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core, + struct umc_mca_addr *in, struct umc_phy_addr *out, + uint32_t nps) +{ + struct ras_ta_query_address_input addr_in; + struct ras_ta_query_address_output addr_out; + int ret; + + if (!in) + return -EINVAL; + + memset(&addr_in, 0, sizeof(addr_in)); + memset(&addr_out, 0, sizeof(addr_out)); + + addr_in.ma.err_addr = in->err_addr; + addr_in.ma.ch_inst = in->ch_inst; + addr_in.ma.umc_inst = in->umc_inst; + addr_in.ma.node_inst = in->node_inst; + addr_in.ma.socket_id = in->socket_id; + + addr_in.addr_type = RAS_TA_MCA_TO_PA; + + ret = ras_psp_query_address(ras_core, &addr_in, &addr_out); + if (ret) { + RAS_DEV_WARN(ras_core->dev, + "Failed to query RAS physical address for 0x%llx, ret:%d", + in->err_addr, ret); + return -EREMOTEIO; + } + + if (out) { + out->pa = addr_out.pa.pa; + out->bank = addr_out.pa.bank; + out->channel_idx = addr_out.pa.channel_idx; + } + + return 0; +} + +static int ras_umc_log_ecc(struct ras_core_context *ras_core, + unsigned long idx, void *data) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + int ret; + + mutex_lock(&ras_umc->tree_lock); + ret = radix_tree_insert(&ras_umc->root, idx, data); + if (!ret) + radix_tree_tag_set(&ras_umc->root, idx, UMC_ECC_NEW_DETECTED_TAG); + mutex_unlock(&ras_umc->tree_lock); + + return ret; +} + +int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + uint64_t buf[8] = {0}; + void **slot; + void *data; + void *iter = buf; + + mutex_lock(&ras_umc->tree_lock); + radix_tree_for_each_slot(slot, &ras_umc->root, iter, 0) { + data = ras_radix_tree_delete_iter(&ras_umc->root, iter); + kfree(data); + } + mutex_unlock(&ras_umc->tree_lock); + + return 0; +} + +static void ras_umc_reserve_eeprom_record(struct ras_core_context *ras_core, + struct eeprom_umc_record *record) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + uint64_t page_pfn[16]; + int count = 0, i; + + memset(page_pfn, 0, sizeof(page_pfn)); + if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) { + count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core, + record, record->cur_nps, page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + RAS_DEV_ERR(ras_core->dev, + "Fail to convert error address! count:%d\n", count); + return; + } + } + + /* Reserve memory */ + for (i = 0; i < count; i++) + ras_core_event_notify(ras_core, + RAS_EVENT_ID__RESERVE_BAD_PAGE, &page_pfn[i]); +} + +/* When gpu reset is ongoing, ecc logging operations will be pended. + */ +int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct ras_bank_ecc_node *ecc_node; + + ecc_node = kzalloc(sizeof(*ecc_node), GFP_KERNEL); + if (!ecc_node) + return -ENOMEM; + + memcpy(&ecc_node->ecc, bank, sizeof(ecc_node->ecc)); + + mutex_lock(&ras_umc->pending_ecc_lock); + list_add_tail(&ecc_node->node, &ras_umc->pending_ecc_list); + mutex_unlock(&ras_umc->pending_ecc_lock); + + return 0; +} + +/* After gpu reset is complete, re-log the pending error banks. + */ +int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct ras_bank_ecc_node *ecc_node, *tmp; + + mutex_lock(&ras_umc->pending_ecc_lock); + list_for_each_entry_safe(ecc_node, + tmp, &ras_umc->pending_ecc_list, node){ + if (ecc_node && !ras_umc_log_bad_bank(ras_core, &ecc_node->ecc)) { + list_del(&ecc_node->node); + kfree(ecc_node); + } + } + mutex_unlock(&ras_umc->pending_ecc_lock); + + return 0; +} + +int ras_umc_log_bad_bank(struct ras_core_context *ras_core, struct ras_bank_ecc *bank) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_umc_record umc_rec; + struct eeprom_umc_record *err_rec; + int ret; + + memset(&umc_rec, 0, sizeof(umc_rec)); + + mutex_lock(&ras_umc->bank_log_lock); + ret = ras_umc->ip_func->bank_to_eeprom_record(ras_core, bank, &umc_rec); + if (ret) + goto out; + + err_rec = kzalloc(sizeof(*err_rec), GFP_KERNEL); + if (!err_rec) { + ret = -ENOMEM; + goto out; + } + + memcpy(err_rec, &umc_rec, sizeof(umc_rec)); + ret = ras_umc_log_ecc(ras_core, err_rec->cur_nps_retired_row_pfn, err_rec); + if (ret) { + if (ret == -EEXIST) { + RAS_DEV_INFO(ras_core->dev, "The bad pages have been logged before.\n"); + ret = 0; + } + + kfree(err_rec); + goto out; + } + + ras_umc_reserve_eeprom_record(ras_core, err_rec); + + ret = ras_core_event_notify(ras_core, + RAS_EVENT_ID__BAD_PAGE_DETECTED, NULL); + +out: + mutex_unlock(&ras_umc->bank_log_lock); + return ret; +} + +static int ras_umc_get_new_records(struct ras_core_context *ras_core, + struct eeprom_umc_record *records, u32 num) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_umc_record *entries[MAX_ECC_NUM_PER_RETIREMENT]; + u32 entry_num = num < MAX_ECC_NUM_PER_RETIREMENT ? num : MAX_ECC_NUM_PER_RETIREMENT; + int count = 0; + int new_detected, i; + + mutex_lock(&ras_umc->tree_lock); + new_detected = radix_tree_gang_lookup_tag(&ras_umc->root, (void **)entries, + 0, entry_num, UMC_ECC_NEW_DETECTED_TAG); + for (i = 0; i < new_detected; i++) { + if (!entries[i]) + continue; + + memcpy(&records[i], entries[i], sizeof(struct eeprom_umc_record)); + count++; + radix_tree_tag_clear(&ras_umc->root, + entries[i]->cur_nps_retired_row_pfn, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&ras_umc->tree_lock); + + return count; +} + +static bool ras_umc_check_retired_record(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, bool from_eeprom) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_store_record *data = &ras_umc->umc_err_data.rom_data; + uint32_t nps = 0; + int i, ret; + + if (from_eeprom) { + nps = ras_umc->umc_err_data.umc_nps_mode; + if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_record) { + ret = ras_umc->ip_func->eeprom_record_to_nps_record(ras_core, record, nps); + if (ret) + RAS_DEV_WARN(ras_core->dev, + "Failed to adjust eeprom record, ret:%d", ret); + } + return false; + } + + for (i = 0; i < data->count; i++) { + if ((data->bps[i].retired_row_pfn == record->retired_row_pfn) && + (data->bps[i].cur_nps_retired_row_pfn == record->cur_nps_retired_row_pfn)) + return true; + } + + return false; +} + +/* alloc/realloc bps array */ +static int ras_umc_realloc_err_data_space(struct ras_core_context *ras_core, + struct eeprom_store_record *data, int pages) +{ + unsigned int old_space = data->count + data->space_left; + unsigned int new_space = old_space + pages; + unsigned int align_space = ALIGN(new_space, 512); + void *bps = kzalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + + if (!bps) + return -ENOMEM; + + if (data->bps) { + memcpy(bps, data->bps, + data->count * sizeof(*data->bps)); + kfree(data->bps); + } + + data->bps = bps; + data->space_left += align_space - old_space; + return 0; +} + +static int ras_umc_update_eeprom_rom_data(struct ras_core_context *ras_core, + struct eeprom_umc_record *bps) +{ + struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.rom_data; + + if (!data->space_left && + ras_umc_realloc_err_data_space(ras_core, data, 256)) { + return -ENOMEM; + } + + memcpy(&data->bps[data->count], bps, sizeof(*data->bps)); + data->count++; + data->space_left--; + return 0; +} + +static int ras_umc_update_eeprom_ram_data(struct ras_core_context *ras_core, + struct eeprom_umc_record *bps) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_store_record *data = &ras_umc->umc_err_data.ram_data; + uint64_t page_pfn[16]; + int count = 0, j; + + if (!data->space_left && + ras_umc_realloc_err_data_space(ras_core, data, 256)) { + return -ENOMEM; + } + + memset(page_pfn, 0, sizeof(page_pfn)); + if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) + count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core, + bps, bps->cur_nps, page_pfn, ARRAY_SIZE(page_pfn)); + + if (count > 0) { + for (j = 0; j < count; j++) { + bps->cur_nps_retired_row_pfn = page_pfn[j]; + memcpy(&data->bps[data->count], bps, sizeof(*data->bps)); + data->count++; + data->space_left--; + } + } else { + memcpy(&data->bps[data->count], bps, sizeof(*data->bps)); + data->count++; + data->space_left--; + } + + return 0; +} + +/* it deal with vram only. */ +static int ras_umc_add_bad_pages(struct ras_core_context *ras_core, + struct eeprom_umc_record *bps, + int pages, bool from_eeprom) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct ras_umc_err_data *data = &ras_umc->umc_err_data; + int i, ret = 0; + + if (!bps || pages <= 0) + return 0; + + mutex_lock(&ras_umc->umc_lock); + for (i = 0; i < pages; i++) { + if (ras_umc_check_retired_record(ras_core, &bps[i], from_eeprom)) + continue; + + ret = ras_umc_update_eeprom_rom_data(ras_core, &bps[i]); + if (ret) + goto out; + + if (data->last_retired_pfn == bps[i].cur_nps_retired_row_pfn) + continue; + + data->last_retired_pfn = bps[i].cur_nps_retired_row_pfn; + + if (from_eeprom) + ras_umc_reserve_eeprom_record(ras_core, &bps[i]); + + ret = ras_umc_update_eeprom_ram_data(ras_core, &bps[i]); + if (ret) + goto out; + } +out: + mutex_unlock(&ras_umc->umc_lock); + + return ret; +} + +/* + * read error record array in eeprom and reserve enough space for + * storing new bad pages + */ +int ras_umc_load_bad_pages(struct ras_core_context *ras_core) +{ + struct eeprom_umc_record *bps; + uint32_t ras_num_recs; + int ret; + + ras_num_recs = ras_eeprom_get_record_count(ras_core); + /* no bad page record, skip eeprom access */ + if (!ras_num_recs || + ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) + return 0; + + bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL); + if (!bps) + return -ENOMEM; + + ret = ras_eeprom_read(ras_core, bps, ras_num_recs); + if (ret) { + RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!"); + } else { + ras_core->ras_umc.umc_err_data.last_retired_pfn = UMC_INV_MEM_PFN; + ret = ras_umc_add_bad_pages(ras_core, bps, ras_num_recs, true); + } + + kfree(bps); + return ret; +} + +/* + * write error record array to eeprom, the function should be + * protected by recovery_lock + * new_cnt: new added UE count, excluding reserved bad pages, can be NULL + */ +static int ras_umc_save_bad_pages(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_store_record *data = &ras_umc->umc_err_data.rom_data; + uint32_t eeprom_record_num; + int save_count; + int ret = 0; + + if (!data->bps) + return 0; + + eeprom_record_num = ras_eeprom_get_record_count(ras_core); + mutex_lock(&ras_umc->umc_lock); + save_count = data->count - eeprom_record_num; + /* only new entries are saved */ + if (save_count > 0) { + if (ras_eeprom_append(ras_core, + &data->bps[eeprom_record_num], + save_count)) { + RAS_DEV_ERR(ras_core->dev, "Failed to save EEPROM table data!"); + ret = -EIO; + goto exit; + } + + RAS_DEV_INFO(ras_core->dev, "Saved %d pages to EEPROM table.\n", save_count); + } + +exit: + mutex_unlock(&ras_umc->umc_lock); + return ret; +} + +int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data) +{ + struct eeprom_umc_record records[MAX_ECC_NUM_PER_RETIREMENT]; + int count, ret; + + memset(records, 0, sizeof(records)); + count = ras_umc_get_new_records(ras_core, records, ARRAY_SIZE(records)); + if (count <= 0) + return -ENODATA; + + ret = ras_umc_add_bad_pages(ras_core, records, count, false); + if (ret) { + RAS_DEV_ERR(ras_core->dev, "Failed to add ras bad page!\n"); + return -EINVAL; + } + + ret = ras_umc_save_bad_pages(ras_core); + if (ret) { + RAS_DEV_ERR(ras_core->dev, "Failed to save ras bad page\n"); + return -EINVAL; + } + + return 0; +} + +int ras_umc_sw_init(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + + memset(ras_umc, 0, sizeof(*ras_umc)); + + INIT_LIST_HEAD(&ras_umc->pending_ecc_list); + + INIT_RADIX_TREE(&ras_umc->root, GFP_KERNEL); + + mutex_init(&ras_umc->tree_lock); + mutex_init(&ras_umc->pending_ecc_lock); + mutex_init(&ras_umc->umc_lock); + mutex_init(&ras_umc->bank_log_lock); + + return 0; +} + +int ras_umc_sw_fini(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct ras_umc_err_data *umc_err_data = &ras_umc->umc_err_data; + struct ras_bank_ecc_node *ecc_node, *tmp; + + mutex_destroy(&ras_umc->umc_lock); + mutex_destroy(&ras_umc->bank_log_lock); + + if (umc_err_data->rom_data.bps) { + umc_err_data->rom_data.count = 0; + kfree(umc_err_data->rom_data.bps); + umc_err_data->rom_data.bps = NULL; + umc_err_data->rom_data.space_left = 0; + } + + if (umc_err_data->ram_data.bps) { + umc_err_data->ram_data.count = 0; + kfree(umc_err_data->ram_data.bps); + umc_err_data->ram_data.bps = NULL; + umc_err_data->ram_data.space_left = 0; + } + + ras_umc_clear_logged_ecc(ras_core); + + mutex_lock(&ras_umc->pending_ecc_lock); + list_for_each_entry_safe(ecc_node, + tmp, &ras_umc->pending_ecc_list, node){ + list_del(&ecc_node->node); + kfree(ecc_node); + } + mutex_unlock(&ras_umc->pending_ecc_lock); + + mutex_destroy(&ras_umc->tree_lock); + mutex_destroy(&ras_umc->pending_ecc_lock); + + return 0; +} + +int ras_umc_hw_init(struct ras_core_context *ras_core) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + uint32_t nps; + + nps = ras_core_get_curr_nps_mode(ras_core); + + if (!nps || (nps >= UMC_MEMORY_PARTITION_MODE_UNKNOWN)) { + RAS_DEV_ERR(ras_core->dev, "Invalid memory NPS mode: %u!\n", nps); + return -ENODATA; + } + + ras_umc->umc_err_data.umc_nps_mode = nps; + + ras_umc->umc_vram_type = ras_core->config->umc_cfg.umc_vram_type; + if (!ras_umc->umc_vram_type) { + RAS_DEV_ERR(ras_core->dev, "Invalid UMC VRAM Type: %u!\n", + ras_umc->umc_vram_type); + return -ENODATA; + } + + ras_umc->umc_ip_version = ras_core->config->umc_ip_version; + ras_umc->ip_func = ras_umc_get_ip_func(ras_core, ras_umc->umc_ip_version); + if (!ras_umc->ip_func) + return -EINVAL; + + return 0; +} + +int ras_umc_hw_fini(struct ras_core_context *ras_core) +{ + return 0; +} + +int ras_umc_clean_badpage_data(struct ras_core_context *ras_core) +{ + struct ras_umc_err_data *data = &ras_core->ras_umc.umc_err_data; + + mutex_lock(&ras_core->ras_umc.umc_lock); + + kfree(data->rom_data.bps); + kfree(data->ram_data.bps); + + memset(data, 0, sizeof(*data)); + mutex_unlock(&ras_core->ras_umc.umc_lock); + + return 0; +} + +int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core, + uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr, + enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record) +{ + struct eeprom_umc_record *err_rec = record; + + /* Set bad page pfn and nps mode */ + EEPROM_RECORD_SETUP_UMC_ADDR_AND_NPS(err_rec, + RAS_ADDR_TO_PFN(cur_nps_addr->pa), cur_nps); + + err_rec->address = err_addr; + err_rec->ts = ras_umc_get_eeprom_timestamp(ras_core); + err_rec->err_type = RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = cur_nps_addr->channel_idx; + err_rec->mcumc_id = umc_inst; + err_rec->cur_nps_retired_row_pfn = RAS_ADDR_TO_PFN(cur_nps_addr->pa); + err_rec->cur_nps_bank = cur_nps_addr->bank; + err_rec->cur_nps = cur_nps; + return 0; +} + +int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core) +{ + struct ras_umc_err_data *err_data = &ras_core->ras_umc.umc_err_data; + + return err_data->rom_data.count; +} + +int ras_umc_get_badpage_count(struct ras_core_context *ras_core) +{ + struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.ram_data; + + return data->count; +} + +int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record) +{ + struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.ram_data; + + if (index >= data->count) + return -EINVAL; + + memcpy(record, &data->bps[index], sizeof(struct eeprom_umc_record)); + return 0; +} + +bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + struct eeprom_store_record *data = &ras_umc->umc_err_data.ram_data; + uint64_t page_pfn = RAS_ADDR_TO_PFN(addr); + int i, ret = false; + + mutex_lock(&ras_umc->umc_lock); + for (i = 0; i < data->count; i++) { + if (data->bps[i].cur_nps_retired_row_pfn == page_pfn) { + ret = true; + break; + } + } + mutex_unlock(&ras_umc->umc_lock); + + return ret; +} + +int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core, + uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa) +{ + struct ras_umc *ras_umc = &ras_core->ras_umc; + int ret = 0; + + if (bank_to_pa) + ret = ras_umc->ip_func->bank_to_soc_pa(ras_core, *bank_addr, soc_pa); + else + ret = ras_umc->ip_func->soc_pa_to_bank(ras_core, *soc_pa, bank_addr); + + return ret; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.h b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h new file mode 100644 index 000000000000..7d9e779d8c4c --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_UMC_H__ +#define __RAS_UMC_H__ +#include "ras.h" +#include "ras_eeprom.h" +#include "ras_cmd.h" + +#define UMC_VRAM_TYPE_UNKNOWN 0 +#define UMC_VRAM_TYPE_GDDR1 1 +#define UMC_VRAM_TYPE_DDR2 2 +#define UMC_VRAM_TYPE_GDDR3 3 +#define UMC_VRAM_TYPE_GDDR4 4 +#define UMC_VRAM_TYPE_GDDR5 5 +#define UMC_VRAM_TYPE_HBM 6 +#define UMC_VRAM_TYPE_DDR3 7 +#define UMC_VRAM_TYPE_DDR4 8 +#define UMC_VRAM_TYPE_GDDR6 9 +#define UMC_VRAM_TYPE_DDR5 10 +#define UMC_VRAM_TYPE_LPDDR4 11 +#define UMC_VRAM_TYPE_LPDDR5 12 +#define UMC_VRAM_TYPE_HBM3E 13 + +#define UMC_ECC_NEW_DETECTED_TAG 0x1 +#define UMC_INV_MEM_PFN (0xFFFFFFFFFFFFFFFF) + +/* three column bits and one row bit in MCA address flip + * in bad page retirement + */ +#define UMC_PA_FLIP_BITS_NUM 4 + +enum umc_memory_partition_mode { + UMC_MEMORY_PARTITION_MODE_NONE = 0, + UMC_MEMORY_PARTITION_MODE_NPS1 = 1, + UMC_MEMORY_PARTITION_MODE_NPS2 = 2, + UMC_MEMORY_PARTITION_MODE_NPS3 = 3, + UMC_MEMORY_PARTITION_MODE_NPS4 = 4, + UMC_MEMORY_PARTITION_MODE_NPS6 = 6, + UMC_MEMORY_PARTITION_MODE_NPS8 = 8, + UMC_MEMORY_PARTITION_MODE_UNKNOWN +}; + +struct ras_core_context; +struct ras_bank_ecc; + +struct umc_flip_bits { + uint32_t flip_bits_in_pa[UMC_PA_FLIP_BITS_NUM]; + uint32_t flip_row_bit; + uint32_t r13_in_pa; + uint32_t bit_num; +}; + +struct umc_mca_addr { + uint64_t err_addr; + uint32_t ch_inst; + uint32_t umc_inst; + uint32_t node_inst; + uint32_t socket_id; +}; + +struct umc_phy_addr { + uint64_t pa; + uint32_t bank; + uint32_t channel_idx; +}; + +struct umc_bank_addr { + uint32_t stack_id; /* SID */ + uint32_t bank_group; + uint32_t bank; + uint32_t row; + uint32_t column; + uint32_t channel; + uint32_t subchannel; /* Also called Pseudochannel (PC) */ +}; + +struct ras_umc_ip_func { + int (*bank_to_eeprom_record)(struct ras_core_context *ras_core, + struct ras_bank_ecc *bank, struct eeprom_umc_record *record); + int (*eeprom_record_to_nps_record)(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps); + int (*eeprom_record_to_nps_pages)(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps, + uint64_t *pfns, uint32_t num); + int (*bank_to_soc_pa)(struct ras_core_context *ras_core, + struct umc_bank_addr bank_addr, uint64_t *soc_pa); + int (*soc_pa_to_bank)(struct ras_core_context *ras_core, + uint64_t soc_pa, struct umc_bank_addr *bank_addr); +}; + +struct eeprom_store_record { + /* point to data records array */ + struct eeprom_umc_record *bps; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; +}; + +struct ras_umc_err_data { + struct eeprom_store_record rom_data; + struct eeprom_store_record ram_data; + enum umc_memory_partition_mode umc_nps_mode; + uint64_t last_retired_pfn; +}; + +struct ras_umc { + u32 umc_ip_version; + u32 umc_vram_type; + const struct ras_umc_ip_func *ip_func; + struct radix_tree_root root; + struct mutex tree_lock; + struct mutex umc_lock; + struct mutex bank_log_lock; + struct mutex pending_ecc_lock; + struct ras_umc_err_data umc_err_data; + struct list_head pending_ecc_list; +}; + +int ras_umc_sw_init(struct ras_core_context *ras); +int ras_umc_sw_fini(struct ras_core_context *ras); +int ras_umc_hw_init(struct ras_core_context *ras); +int ras_umc_hw_fini(struct ras_core_context *ras); +int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core, + struct umc_mca_addr *in, struct umc_phy_addr *out, + uint32_t nps); +int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data); +int ras_umc_log_bad_bank(struct ras_core_context *ras, struct ras_bank_ecc *bank); +int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank); +int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core); +int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core); +int ras_umc_load_bad_pages(struct ras_core_context *ras_core); +int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core); +int ras_umc_clean_badpage_data(struct ras_core_context *ras_core); +int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core, + uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr, + enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record); + +int ras_umc_get_badpage_count(struct ras_core_context *ras_core); +int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record); +bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr); +int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core, + uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa); +#endif -- cgit v1.2.3 From 4b23ebf7a079c0b7542db2aa276ea2e4d183cee8 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:29:02 +0800 Subject: drm/amd/ras: Add gfx v9_0 ras functions Add gfx v9_0 ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c | 426 +++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h | 259 +++++++++++++++ 2 files changed, 685 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c new file mode 100644 index 000000000000..6213d3f125be --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.c @@ -0,0 +1,426 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_gfx_v9_0.h" +#include "ras_core_status.h" + +enum ta_gfx_v9_subblock { + /*CPC*/ + TA_GFX_V9__GFX_CPC_INDEX_START = 0, + TA_GFX_V9__GFX_CPC_SCRATCH = TA_GFX_V9__GFX_CPC_INDEX_START, + TA_GFX_V9__GFX_CPC_UCODE, + TA_GFX_V9__GFX_DC_STATE_ME1, + TA_GFX_V9__GFX_DC_CSINVOC_ME1, + TA_GFX_V9__GFX_DC_RESTORE_ME1, + TA_GFX_V9__GFX_DC_STATE_ME2, + TA_GFX_V9__GFX_DC_CSINVOC_ME2, + TA_GFX_V9__GFX_DC_RESTORE_ME2, + TA_GFX_V9__GFX_CPC_INDEX_END = TA_GFX_V9__GFX_DC_RESTORE_ME2, + /* CPF*/ + TA_GFX_V9__GFX_CPF_INDEX_START, + TA_GFX_V9__GFX_CPF_ROQ_ME2 = TA_GFX_V9__GFX_CPF_INDEX_START, + TA_GFX_V9__GFX_CPF_ROQ_ME1, + TA_GFX_V9__GFX_CPF_TAG, + TA_GFX_V9__GFX_CPF_INDEX_END = TA_GFX_V9__GFX_CPF_TAG, + /* CPG*/ + TA_GFX_V9__GFX_CPG_INDEX_START, + TA_GFX_V9__GFX_CPG_DMA_ROQ = TA_GFX_V9__GFX_CPG_INDEX_START, + TA_GFX_V9__GFX_CPG_DMA_TAG, + TA_GFX_V9__GFX_CPG_TAG, + TA_GFX_V9__GFX_CPG_INDEX_END = TA_GFX_V9__GFX_CPG_TAG, + /* GDS*/ + TA_GFX_V9__GFX_GDS_INDEX_START, + TA_GFX_V9__GFX_GDS_MEM = TA_GFX_V9__GFX_GDS_INDEX_START, + TA_GFX_V9__GFX_GDS_INPUT_QUEUE, + TA_GFX_V9__GFX_GDS_OA_PHY_CMD_RAM_MEM, + TA_GFX_V9__GFX_GDS_OA_PHY_DATA_RAM_MEM, + TA_GFX_V9__GFX_GDS_OA_PIPE_MEM, + TA_GFX_V9__GFX_GDS_INDEX_END = TA_GFX_V9__GFX_GDS_OA_PIPE_MEM, + /* SPI*/ + TA_GFX_V9__GFX_SPI_SR_MEM, + /* SQ*/ + TA_GFX_V9__GFX_SQ_INDEX_START, + TA_GFX_V9__GFX_SQ_SGPR = TA_GFX_V9__GFX_SQ_INDEX_START, + TA_GFX_V9__GFX_SQ_LDS_D, + TA_GFX_V9__GFX_SQ_LDS_I, + TA_GFX_V9__GFX_SQ_VGPR, /* VGPR = SP*/ + TA_GFX_V9__GFX_SQ_INDEX_END = TA_GFX_V9__GFX_SQ_VGPR, + /* SQC (3 ranges)*/ + TA_GFX_V9__GFX_SQC_INDEX_START, + /* SQC range 0*/ + TA_GFX_V9__GFX_SQC_INDEX0_START = TA_GFX_V9__GFX_SQC_INDEX_START, + TA_GFX_V9__GFX_SQC_INST_UTCL1_LFIFO = + TA_GFX_V9__GFX_SQC_INDEX0_START, + TA_GFX_V9__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + TA_GFX_V9__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + TA_GFX_V9__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + TA_GFX_V9__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + TA_GFX_V9__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + TA_GFX_V9__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + TA_GFX_V9__GFX_SQC_INDEX0_END = + TA_GFX_V9__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1*/ + TA_GFX_V9__GFX_SQC_INDEX1_START, + TA_GFX_V9__GFX_SQC_INST_BANKA_TAG_RAM = + TA_GFX_V9__GFX_SQC_INDEX1_START, + TA_GFX_V9__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + TA_GFX_V9__GFX_SQC_INST_BANKA_MISS_FIFO, + TA_GFX_V9__GFX_SQC_INST_BANKA_BANK_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKA_TAG_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKA_HIT_FIFO, + TA_GFX_V9__GFX_SQC_DATA_BANKA_MISS_FIFO, + TA_GFX_V9__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKA_BANK_RAM, + TA_GFX_V9__GFX_SQC_INDEX1_END = + TA_GFX_V9__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2*/ + TA_GFX_V9__GFX_SQC_INDEX2_START, + TA_GFX_V9__GFX_SQC_INST_BANKB_TAG_RAM = + TA_GFX_V9__GFX_SQC_INDEX2_START, + TA_GFX_V9__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + TA_GFX_V9__GFX_SQC_INST_BANKB_MISS_FIFO, + TA_GFX_V9__GFX_SQC_INST_BANKB_BANK_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKB_TAG_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKB_HIT_FIFO, + TA_GFX_V9__GFX_SQC_DATA_BANKB_MISS_FIFO, + TA_GFX_V9__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + TA_GFX_V9__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_GFX_V9__GFX_SQC_INDEX2_END = + TA_GFX_V9__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_GFX_V9__GFX_SQC_INDEX_END = TA_GFX_V9__GFX_SQC_INDEX2_END, + /* TA*/ + TA_GFX_V9__GFX_TA_INDEX_START, + TA_GFX_V9__GFX_TA_FS_DFIFO = TA_GFX_V9__GFX_TA_INDEX_START, + TA_GFX_V9__GFX_TA_FS_AFIFO, + TA_GFX_V9__GFX_TA_FL_LFIFO, + TA_GFX_V9__GFX_TA_FX_LFIFO, + TA_GFX_V9__GFX_TA_FS_CFIFO, + TA_GFX_V9__GFX_TA_INDEX_END = TA_GFX_V9__GFX_TA_FS_CFIFO, + /* TCA*/ + TA_GFX_V9__GFX_TCA_INDEX_START, + TA_GFX_V9__GFX_TCA_HOLE_FIFO = TA_GFX_V9__GFX_TCA_INDEX_START, + TA_GFX_V9__GFX_TCA_REQ_FIFO, + TA_GFX_V9__GFX_TCA_INDEX_END = TA_GFX_V9__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges)*/ + TA_GFX_V9__GFX_TCC_INDEX_START, + /* TCC range 0*/ + TA_GFX_V9__GFX_TCC_INDEX0_START = TA_GFX_V9__GFX_TCC_INDEX_START, + TA_GFX_V9__GFX_TCC_CACHE_DATA = TA_GFX_V9__GFX_TCC_INDEX0_START, + TA_GFX_V9__GFX_TCC_CACHE_DATA_BANK_0_1, + TA_GFX_V9__GFX_TCC_CACHE_DATA_BANK_1_0, + TA_GFX_V9__GFX_TCC_CACHE_DATA_BANK_1_1, + TA_GFX_V9__GFX_TCC_CACHE_DIRTY_BANK_0, + TA_GFX_V9__GFX_TCC_CACHE_DIRTY_BANK_1, + TA_GFX_V9__GFX_TCC_HIGH_RATE_TAG, + TA_GFX_V9__GFX_TCC_LOW_RATE_TAG, + TA_GFX_V9__GFX_TCC_INDEX0_END = TA_GFX_V9__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1*/ + TA_GFX_V9__GFX_TCC_INDEX1_START, + TA_GFX_V9__GFX_TCC_IN_USE_DEC = TA_GFX_V9__GFX_TCC_INDEX1_START, + TA_GFX_V9__GFX_TCC_IN_USE_TRANSFER, + TA_GFX_V9__GFX_TCC_INDEX1_END = + TA_GFX_V9__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2*/ + TA_GFX_V9__GFX_TCC_INDEX2_START, + TA_GFX_V9__GFX_TCC_RETURN_DATA = TA_GFX_V9__GFX_TCC_INDEX2_START, + TA_GFX_V9__GFX_TCC_RETURN_CONTROL, + TA_GFX_V9__GFX_TCC_UC_ATOMIC_FIFO, + TA_GFX_V9__GFX_TCC_WRITE_RETURN, + TA_GFX_V9__GFX_TCC_WRITE_CACHE_READ, + TA_GFX_V9__GFX_TCC_SRC_FIFO, + TA_GFX_V9__GFX_TCC_SRC_FIFO_NEXT_RAM, + TA_GFX_V9__GFX_TCC_CACHE_TAG_PROBE_FIFO, + TA_GFX_V9__GFX_TCC_INDEX2_END = + TA_GFX_V9__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3*/ + TA_GFX_V9__GFX_TCC_INDEX3_START, + TA_GFX_V9__GFX_TCC_LATENCY_FIFO = TA_GFX_V9__GFX_TCC_INDEX3_START, + TA_GFX_V9__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + TA_GFX_V9__GFX_TCC_INDEX3_END = + TA_GFX_V9__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4*/ + TA_GFX_V9__GFX_TCC_INDEX4_START, + TA_GFX_V9__GFX_TCC_WRRET_TAG_WRITE_RETURN = + TA_GFX_V9__GFX_TCC_INDEX4_START, + TA_GFX_V9__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_GFX_V9__GFX_TCC_INDEX4_END = + TA_GFX_V9__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_GFX_V9__GFX_TCC_INDEX_END = TA_GFX_V9__GFX_TCC_INDEX4_END, + /* TCI*/ + TA_GFX_V9__GFX_TCI_WRITE_RAM, + /* TCP*/ + TA_GFX_V9__GFX_TCP_INDEX_START, + TA_GFX_V9__GFX_TCP_CACHE_RAM = TA_GFX_V9__GFX_TCP_INDEX_START, + TA_GFX_V9__GFX_TCP_LFIFO_RAM, + TA_GFX_V9__GFX_TCP_CMD_FIFO, + TA_GFX_V9__GFX_TCP_VM_FIFO, + TA_GFX_V9__GFX_TCP_DB_RAM, + TA_GFX_V9__GFX_TCP_UTCL1_LFIFO0, + TA_GFX_V9__GFX_TCP_UTCL1_LFIFO1, + TA_GFX_V9__GFX_TCP_INDEX_END = TA_GFX_V9__GFX_TCP_UTCL1_LFIFO1, + /* TD*/ + TA_GFX_V9__GFX_TD_INDEX_START, + TA_GFX_V9__GFX_TD_SS_FIFO_LO = TA_GFX_V9__GFX_TD_INDEX_START, + TA_GFX_V9__GFX_TD_SS_FIFO_HI, + TA_GFX_V9__GFX_TD_CS_FIFO, + TA_GFX_V9__GFX_TD_INDEX_END = TA_GFX_V9__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges)*/ + TA_GFX_V9__GFX_EA_INDEX_START, + /* EA range 0*/ + TA_GFX_V9__GFX_EA_INDEX0_START = TA_GFX_V9__GFX_EA_INDEX_START, + TA_GFX_V9__GFX_EA_DRAMRD_CMDMEM = TA_GFX_V9__GFX_EA_INDEX0_START, + TA_GFX_V9__GFX_EA_DRAMWR_CMDMEM, + TA_GFX_V9__GFX_EA_DRAMWR_DATAMEM, + TA_GFX_V9__GFX_EA_RRET_TAGMEM, + TA_GFX_V9__GFX_EA_WRET_TAGMEM, + TA_GFX_V9__GFX_EA_GMIRD_CMDMEM, + TA_GFX_V9__GFX_EA_GMIWR_CMDMEM, + TA_GFX_V9__GFX_EA_GMIWR_DATAMEM, + TA_GFX_V9__GFX_EA_INDEX0_END = TA_GFX_V9__GFX_EA_GMIWR_DATAMEM, + /* EA range 1*/ + TA_GFX_V9__GFX_EA_INDEX1_START, + TA_GFX_V9__GFX_EA_DRAMRD_PAGEMEM = TA_GFX_V9__GFX_EA_INDEX1_START, + TA_GFX_V9__GFX_EA_DRAMWR_PAGEMEM, + TA_GFX_V9__GFX_EA_IORD_CMDMEM, + TA_GFX_V9__GFX_EA_IOWR_CMDMEM, + TA_GFX_V9__GFX_EA_IOWR_DATAMEM, + TA_GFX_V9__GFX_EA_GMIRD_PAGEMEM, + TA_GFX_V9__GFX_EA_GMIWR_PAGEMEM, + TA_GFX_V9__GFX_EA_INDEX1_END = TA_GFX_V9__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2*/ + TA_GFX_V9__GFX_EA_INDEX2_START, + TA_GFX_V9__GFX_EA_MAM_D0MEM = TA_GFX_V9__GFX_EA_INDEX2_START, + TA_GFX_V9__GFX_EA_MAM_D1MEM, + TA_GFX_V9__GFX_EA_MAM_D2MEM, + TA_GFX_V9__GFX_EA_MAM_D3MEM, + TA_GFX_V9__GFX_EA_INDEX2_END = TA_GFX_V9__GFX_EA_MAM_D3MEM, + TA_GFX_V9__GFX_EA_INDEX_END = TA_GFX_V9__GFX_EA_INDEX2_END, + /* UTC VM L2 bank*/ + TA_GFX_V9__UTC_VML2_BANK_CACHE, + /* UTC VM walker*/ + TA_GFX_V9__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache*/ + TA_GFX_V9__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache*/ + TA_GFX_V9__UTC_ATCL2_CACHE_4K_BANK, + TA_GFX_V9__GFX_MAX +}; + +struct ras_gfx_subblock_t { + unsigned char *name; + int ta_subblock; + int hw_supported_error_type; + int sw_supported_error_type; +}; + +#define RAS_GFX_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ + [RAS_GFX_V9__##subblock] = { \ + #subblock, \ + TA_GFX_V9__##subblock, \ + ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ + (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ + } + +const struct ras_gfx_subblock_t ras_gfx_v9_0_subblocks[] = { + RAS_GFX_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 1), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, + 1), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, + 1), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, + 1), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, + 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, + 0), + RAS_GFX_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), + RAS_GFX_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), + RAS_GFX_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; + +static int gfx_v9_0_get_ta_subblock(struct ras_core_context *ras_core, + uint32_t error_type, uint32_t subblock, uint32_t *ta_subblock) +{ + const struct ras_gfx_subblock_t *gfx_subblock; + + if (subblock >= ARRAY_SIZE(ras_gfx_v9_0_subblocks)) + return -EINVAL; + + gfx_subblock = &ras_gfx_v9_0_subblocks[subblock]; + if (!gfx_subblock->name) + return -EPERM; + + if (!(gfx_subblock->hw_supported_error_type & error_type)) { + RAS_DEV_ERR(ras_core->dev, "GFX Subblock %s, hardware do not support type 0x%x\n", + gfx_subblock->name, error_type); + return -EPERM; + } + + if (!(gfx_subblock->sw_supported_error_type & error_type)) { + RAS_DEV_ERR(ras_core->dev, "GFX Subblock %s, driver do not support type 0x%x\n", + gfx_subblock->name, error_type); + return -EPERM; + } + + *ta_subblock = gfx_subblock->ta_subblock; + + return 0; +} + +const struct ras_gfx_ip_func gfx_ras_func_v9_0 = { + .get_ta_subblock = gfx_v9_0_get_ta_subblock, +}; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h new file mode 100644 index 000000000000..659b56619747 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_gfx_v9_0.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_GFX_V9_0_H__ +#define __RAS_GFX_V9_0_H__ + +enum ras_gfx_v9_subblock { + /* CPC */ + RAS_GFX_V9__GFX_CPC_INDEX_START = 0, + RAS_GFX_V9__GFX_CPC_SCRATCH = + RAS_GFX_V9__GFX_CPC_INDEX_START, + RAS_GFX_V9__GFX_CPC_UCODE, + RAS_GFX_V9__GFX_DC_STATE_ME1, + RAS_GFX_V9__GFX_DC_CSINVOC_ME1, + RAS_GFX_V9__GFX_DC_RESTORE_ME1, + RAS_GFX_V9__GFX_DC_STATE_ME2, + RAS_GFX_V9__GFX_DC_CSINVOC_ME2, + RAS_GFX_V9__GFX_DC_RESTORE_ME2, + RAS_GFX_V9__GFX_CPC_INDEX_END = + RAS_GFX_V9__GFX_DC_RESTORE_ME2, + /* CPF */ + RAS_GFX_V9__GFX_CPF_INDEX_START, + RAS_GFX_V9__GFX_CPF_ROQ_ME2 = + RAS_GFX_V9__GFX_CPF_INDEX_START, + RAS_GFX_V9__GFX_CPF_ROQ_ME1, + RAS_GFX_V9__GFX_CPF_TAG, + RAS_GFX_V9__GFX_CPF_INDEX_END = RAS_GFX_V9__GFX_CPF_TAG, + /* CPG */ + RAS_GFX_V9__GFX_CPG_INDEX_START, + RAS_GFX_V9__GFX_CPG_DMA_ROQ = + RAS_GFX_V9__GFX_CPG_INDEX_START, + RAS_GFX_V9__GFX_CPG_DMA_TAG, + RAS_GFX_V9__GFX_CPG_TAG, + RAS_GFX_V9__GFX_CPG_INDEX_END = RAS_GFX_V9__GFX_CPG_TAG, + /* GDS */ + RAS_GFX_V9__GFX_GDS_INDEX_START, + RAS_GFX_V9__GFX_GDS_MEM = RAS_GFX_V9__GFX_GDS_INDEX_START, + RAS_GFX_V9__GFX_GDS_INPUT_QUEUE, + RAS_GFX_V9__GFX_GDS_OA_PHY_CMD_RAM_MEM, + RAS_GFX_V9__GFX_GDS_OA_PHY_DATA_RAM_MEM, + RAS_GFX_V9__GFX_GDS_OA_PIPE_MEM, + RAS_GFX_V9__GFX_GDS_INDEX_END = + RAS_GFX_V9__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + RAS_GFX_V9__GFX_SPI_SR_MEM, + /* SQ */ + RAS_GFX_V9__GFX_SQ_INDEX_START, + RAS_GFX_V9__GFX_SQ_SGPR = RAS_GFX_V9__GFX_SQ_INDEX_START, + RAS_GFX_V9__GFX_SQ_LDS_D, + RAS_GFX_V9__GFX_SQ_LDS_I, + RAS_GFX_V9__GFX_SQ_VGPR, + RAS_GFX_V9__GFX_SQ_INDEX_END = RAS_GFX_V9__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + RAS_GFX_V9__GFX_SQC_INDEX_START, + /* SQC range 0 */ + RAS_GFX_V9__GFX_SQC_INDEX0_START = + RAS_GFX_V9__GFX_SQC_INDEX_START, + RAS_GFX_V9__GFX_SQC_INST_UTCL1_LFIFO = + RAS_GFX_V9__GFX_SQC_INDEX0_START, + RAS_GFX_V9__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + RAS_GFX_V9__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + RAS_GFX_V9__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + RAS_GFX_V9__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + RAS_GFX_V9__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + RAS_GFX_V9__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + RAS_GFX_V9__GFX_SQC_INDEX0_END = + RAS_GFX_V9__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + RAS_GFX_V9__GFX_SQC_INDEX1_START, + RAS_GFX_V9__GFX_SQC_INST_BANKA_TAG_RAM = + RAS_GFX_V9__GFX_SQC_INDEX1_START, + RAS_GFX_V9__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_INST_BANKA_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_INST_BANKA_BANK_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKA_TAG_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKA_HIT_FIFO, + RAS_GFX_V9__GFX_SQC_DATA_BANKA_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKA_BANK_RAM, + RAS_GFX_V9__GFX_SQC_INDEX1_END = + RAS_GFX_V9__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + RAS_GFX_V9__GFX_SQC_INDEX2_START, + RAS_GFX_V9__GFX_SQC_INST_BANKB_TAG_RAM = + RAS_GFX_V9__GFX_SQC_INDEX2_START, + RAS_GFX_V9__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_INST_BANKB_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_INST_BANKB_BANK_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKB_TAG_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKB_HIT_FIFO, + RAS_GFX_V9__GFX_SQC_DATA_BANKB_MISS_FIFO, + RAS_GFX_V9__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + RAS_GFX_V9__GFX_SQC_DATA_BANKB_BANK_RAM, + RAS_GFX_V9__GFX_SQC_INDEX2_END = + RAS_GFX_V9__GFX_SQC_DATA_BANKB_BANK_RAM, + RAS_GFX_V9__GFX_SQC_INDEX_END = + RAS_GFX_V9__GFX_SQC_INDEX2_END, + /* TA */ + RAS_GFX_V9__GFX_TA_INDEX_START, + RAS_GFX_V9__GFX_TA_FS_DFIFO = + RAS_GFX_V9__GFX_TA_INDEX_START, + RAS_GFX_V9__GFX_TA_FS_AFIFO, + RAS_GFX_V9__GFX_TA_FL_LFIFO, + RAS_GFX_V9__GFX_TA_FX_LFIFO, + RAS_GFX_V9__GFX_TA_FS_CFIFO, + RAS_GFX_V9__GFX_TA_INDEX_END = RAS_GFX_V9__GFX_TA_FS_CFIFO, + /* TCA */ + RAS_GFX_V9__GFX_TCA_INDEX_START, + RAS_GFX_V9__GFX_TCA_HOLE_FIFO = + RAS_GFX_V9__GFX_TCA_INDEX_START, + RAS_GFX_V9__GFX_TCA_REQ_FIFO, + RAS_GFX_V9__GFX_TCA_INDEX_END = + RAS_GFX_V9__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + RAS_GFX_V9__GFX_TCC_INDEX_START, + /* TCC range 0 */ + RAS_GFX_V9__GFX_TCC_INDEX0_START = + RAS_GFX_V9__GFX_TCC_INDEX_START, + RAS_GFX_V9__GFX_TCC_CACHE_DATA = + RAS_GFX_V9__GFX_TCC_INDEX0_START, + RAS_GFX_V9__GFX_TCC_CACHE_DATA_BANK_0_1, + RAS_GFX_V9__GFX_TCC_CACHE_DATA_BANK_1_0, + RAS_GFX_V9__GFX_TCC_CACHE_DATA_BANK_1_1, + RAS_GFX_V9__GFX_TCC_CACHE_DIRTY_BANK_0, + RAS_GFX_V9__GFX_TCC_CACHE_DIRTY_BANK_1, + RAS_GFX_V9__GFX_TCC_HIGH_RATE_TAG, + RAS_GFX_V9__GFX_TCC_LOW_RATE_TAG, + RAS_GFX_V9__GFX_TCC_INDEX0_END = + RAS_GFX_V9__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + RAS_GFX_V9__GFX_TCC_INDEX1_START, + RAS_GFX_V9__GFX_TCC_IN_USE_DEC = + RAS_GFX_V9__GFX_TCC_INDEX1_START, + RAS_GFX_V9__GFX_TCC_IN_USE_TRANSFER, + RAS_GFX_V9__GFX_TCC_INDEX1_END = + RAS_GFX_V9__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + RAS_GFX_V9__GFX_TCC_INDEX2_START, + RAS_GFX_V9__GFX_TCC_RETURN_DATA = + RAS_GFX_V9__GFX_TCC_INDEX2_START, + RAS_GFX_V9__GFX_TCC_RETURN_CONTROL, + RAS_GFX_V9__GFX_TCC_UC_ATOMIC_FIFO, + RAS_GFX_V9__GFX_TCC_WRITE_RETURN, + RAS_GFX_V9__GFX_TCC_WRITE_CACHE_READ, + RAS_GFX_V9__GFX_TCC_SRC_FIFO, + RAS_GFX_V9__GFX_TCC_SRC_FIFO_NEXT_RAM, + RAS_GFX_V9__GFX_TCC_CACHE_TAG_PROBE_FIFO, + RAS_GFX_V9__GFX_TCC_INDEX2_END = + RAS_GFX_V9__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + RAS_GFX_V9__GFX_TCC_INDEX3_START, + RAS_GFX_V9__GFX_TCC_LATENCY_FIFO = + RAS_GFX_V9__GFX_TCC_INDEX3_START, + RAS_GFX_V9__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + RAS_GFX_V9__GFX_TCC_INDEX3_END = + RAS_GFX_V9__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + RAS_GFX_V9__GFX_TCC_INDEX4_START, + RAS_GFX_V9__GFX_TCC_WRRET_TAG_WRITE_RETURN = + RAS_GFX_V9__GFX_TCC_INDEX4_START, + RAS_GFX_V9__GFX_TCC_ATOMIC_RETURN_BUFFER, + RAS_GFX_V9__GFX_TCC_INDEX4_END = + RAS_GFX_V9__GFX_TCC_ATOMIC_RETURN_BUFFER, + RAS_GFX_V9__GFX_TCC_INDEX_END = + RAS_GFX_V9__GFX_TCC_INDEX4_END, + /* TCI */ + RAS_GFX_V9__GFX_TCI_WRITE_RAM, + /* TCP */ + RAS_GFX_V9__GFX_TCP_INDEX_START, + RAS_GFX_V9__GFX_TCP_CACHE_RAM = + RAS_GFX_V9__GFX_TCP_INDEX_START, + RAS_GFX_V9__GFX_TCP_LFIFO_RAM, + RAS_GFX_V9__GFX_TCP_CMD_FIFO, + RAS_GFX_V9__GFX_TCP_VM_FIFO, + RAS_GFX_V9__GFX_TCP_DB_RAM, + RAS_GFX_V9__GFX_TCP_UTCL1_LFIFO0, + RAS_GFX_V9__GFX_TCP_UTCL1_LFIFO1, + RAS_GFX_V9__GFX_TCP_INDEX_END = + RAS_GFX_V9__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + RAS_GFX_V9__GFX_TD_INDEX_START, + RAS_GFX_V9__GFX_TD_SS_FIFO_LO = + RAS_GFX_V9__GFX_TD_INDEX_START, + RAS_GFX_V9__GFX_TD_SS_FIFO_HI, + RAS_GFX_V9__GFX_TD_CS_FIFO, + RAS_GFX_V9__GFX_TD_INDEX_END = RAS_GFX_V9__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + RAS_GFX_V9__GFX_EA_INDEX_START, + /* EA range 0 */ + RAS_GFX_V9__GFX_EA_INDEX0_START = + RAS_GFX_V9__GFX_EA_INDEX_START, + RAS_GFX_V9__GFX_EA_DRAMRD_CMDMEM = + RAS_GFX_V9__GFX_EA_INDEX0_START, + RAS_GFX_V9__GFX_EA_DRAMWR_CMDMEM, + RAS_GFX_V9__GFX_EA_DRAMWR_DATAMEM, + RAS_GFX_V9__GFX_EA_RRET_TAGMEM, + RAS_GFX_V9__GFX_EA_WRET_TAGMEM, + RAS_GFX_V9__GFX_EA_GMIRD_CMDMEM, + RAS_GFX_V9__GFX_EA_GMIWR_CMDMEM, + RAS_GFX_V9__GFX_EA_GMIWR_DATAMEM, + RAS_GFX_V9__GFX_EA_INDEX0_END = + RAS_GFX_V9__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + RAS_GFX_V9__GFX_EA_INDEX1_START, + RAS_GFX_V9__GFX_EA_DRAMRD_PAGEMEM = + RAS_GFX_V9__GFX_EA_INDEX1_START, + RAS_GFX_V9__GFX_EA_DRAMWR_PAGEMEM, + RAS_GFX_V9__GFX_EA_IORD_CMDMEM, + RAS_GFX_V9__GFX_EA_IOWR_CMDMEM, + RAS_GFX_V9__GFX_EA_IOWR_DATAMEM, + RAS_GFX_V9__GFX_EA_GMIRD_PAGEMEM, + RAS_GFX_V9__GFX_EA_GMIWR_PAGEMEM, + RAS_GFX_V9__GFX_EA_INDEX1_END = + RAS_GFX_V9__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + RAS_GFX_V9__GFX_EA_INDEX2_START, + RAS_GFX_V9__GFX_EA_MAM_D0MEM = + RAS_GFX_V9__GFX_EA_INDEX2_START, + RAS_GFX_V9__GFX_EA_MAM_D1MEM, + RAS_GFX_V9__GFX_EA_MAM_D2MEM, + RAS_GFX_V9__GFX_EA_MAM_D3MEM, + RAS_GFX_V9__GFX_EA_INDEX2_END = + RAS_GFX_V9__GFX_EA_MAM_D3MEM, + RAS_GFX_V9__GFX_EA_INDEX_END = + RAS_GFX_V9__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + RAS_GFX_V9__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + RAS_GFX_V9__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + RAS_GFX_V9__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + RAS_GFX_V9__UTC_ATCL2_CACHE_4K_BANK, + RAS_GFX_V9__GFX_MAX +}; + +extern const struct ras_gfx_ip_func gfx_ras_func_v9_0; + +#endif -- cgit v1.2.3 From a8f2352a41f3ed8e1f037bfd722c2856dcf53840 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:28:14 +0800 Subject: drm/amd/ras: Add gfx common ras functions Add gfx common ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_gfx.c | 70 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_gfx.h | 43 +++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_gfx.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_gfx.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_gfx.c b/drivers/gpu/drm/amd/ras/rascore/ras_gfx.c new file mode 100644 index 000000000000..f5ce28777705 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_gfx.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras.h" +#include "ras_gfx_v9_0.h" +#include "ras_gfx.h" +#include "ras_core_status.h" + +static const struct ras_gfx_ip_func *ras_gfx_get_ip_funcs( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + return &gfx_ras_func_v9_0; + default: + RAS_DEV_ERR(ras_core->dev, + "GFX ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +int ras_gfx_get_ta_subblock(struct ras_core_context *ras_core, + uint32_t error_type, uint32_t subblock, uint32_t *ta_subblock) +{ + struct ras_gfx *gfx = &ras_core->ras_gfx; + + return gfx->ip_func->get_ta_subblock(ras_core, + error_type, subblock, ta_subblock); +} + +int ras_gfx_hw_init(struct ras_core_context *ras_core) +{ + struct ras_gfx *gfx = &ras_core->ras_gfx; + + gfx->gfx_ip_version = ras_core->config->gfx_ip_version; + + gfx->ip_func = ras_gfx_get_ip_funcs(ras_core, gfx->gfx_ip_version); + + return gfx->ip_func ? RAS_CORE_OK : -EINVAL; +} + +int ras_gfx_hw_fini(struct ras_core_context *ras_core) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_gfx.h b/drivers/gpu/drm/amd/ras/rascore/ras_gfx.h new file mode 100644 index 000000000000..8a42d69fb0ad --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_gfx.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_GFX_H__ +#define __RAS_GFX_H__ + +struct ras_gfx_ip_func { + int (*get_ta_subblock)(struct ras_core_context *ras_core, + uint32_t error_type, uint32_t subblock, uint32_t *ta_subblock); +}; + +struct ras_gfx { + uint32_t gfx_ip_version; + const struct ras_gfx_ip_func *ip_func; +}; + +int ras_gfx_hw_init(struct ras_core_context *ras_core); +int ras_gfx_hw_fini(struct ras_core_context *ras_core); + +int ras_gfx_get_ta_subblock(struct ras_core_context *ras_core, + uint32_t error_type, uint32_t subblock, uint32_t *ta_subblock); + +#endif -- cgit v1.2.3 From 5c3be5defc926ba8970bc3d1c26a14adad2b8a7a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:30:48 +0800 Subject: drm/amd/ras: Add eeprom ras functions Add eeprom ras functions. V5: Remove duplicate data structure definition. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c | 1368 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h | 197 ++++ 2 files changed, 1565 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c new file mode 100644 index 000000000000..9e0a4f605db0 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras_eeprom.h" +#include "ras.h" + +/* These are memory addresses as would be seen by one or more EEPROM + * chips strung on the I2C bus, usually by manipulating pins 1-3 of a + * set of EEPROM devices. They form a continuous memory space. + * + * The I2C device address includes the device type identifier, 1010b, + * which is a reserved value and indicates that this is an I2C EEPROM + * device. It also includes the top 3 bits of the 19 bit EEPROM memory + * address, namely bits 18, 17, and 16. This makes up the 7 bit + * address sent on the I2C bus with bit 0 being the direction bit, + * which is not represented here, and sent by the hardware directly. + * + * For instance, + * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0. + * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h. + * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h. + * Depending on the size of the I2C EEPROM device(s), bits 18:16 may + * address memory in a device or a device on the I2C bus, depending on + * the status of pins 1-3. + * + * The RAS table lives either at address 0 or address 40000h of EEPROM. + */ +#define EEPROM_I2C_MADDR_0 0x0 +#define EEPROM_I2C_MADDR_4 0x40000 + +#define EEPROM_PAGE_BITS 8 +#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS) +#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1) + +#define EEPROM_OFFSET_SIZE 2 +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF)) + +/* + * The 2 macros bellow represent the actual size in bytes that + * those entities occupy in the EEPROM memory. + * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_umc_record) which + * uses uint64 to store 6b fields such as retired_page. + */ +#define RAS_TABLE_HEADER_SIZE 20 +#define RAS_TABLE_RECORD_SIZE 24 + +/* Table hdr is 'AMDR' */ +#define RAS_TABLE_HDR_VAL 0x414d4452 + +/* Bad GPU tag ‘BADG’ */ +#define RAS_TABLE_HDR_BAD 0x42414447 + +/* + * EEPROM Table structure v1 + * --------------------------------- + * | | + * | EEPROM TABLE HEADER | + * | ( size 20 Bytes ) | + * | | + * --------------------------------- + * | | + * | BAD PAGE RECORD AREA | + * | | + * --------------------------------- + */ + +/* Assume 2-Mbit size EEPROM and take up the whole space. */ +#define RAS_TBL_SIZE_BYTES (256 * 1024) +#define RAS_TABLE_START 0 +#define RAS_HDR_START RAS_TABLE_START +#define RAS_RECORD_START (RAS_HDR_START + RAS_TABLE_HEADER_SIZE) +#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \ + / RAS_TABLE_RECORD_SIZE) + +/* + * EEPROM Table structrue v2.1 + * --------------------------------- + * | | + * | EEPROM TABLE HEADER | + * | ( size 20 Bytes ) | + * | | + * --------------------------------- + * | | + * | EEPROM TABLE RAS INFO | + * | (available info size 4 Bytes) | + * | ( reserved size 252 Bytes ) | + * | | + * --------------------------------- + * | | + * | BAD PAGE RECORD AREA | + * | | + * --------------------------------- + */ + +/* EEPROM Table V2_1 */ +#define RAS_TABLE_V2_1_INFO_SIZE 256 +#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE +#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \ + RAS_TABLE_V2_1_INFO_SIZE) +#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \ + RAS_TABLE_V2_1_INFO_SIZE) \ + / RAS_TABLE_RECORD_SIZE) + +/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM + * offset off of RAS_TABLE_START. That is, this is something you can + * add to control->i2c_address, and then tell I2C layer to read + * from/write to there. _N is the so called absolute index, + * because it starts right after the table header. + */ +#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \ + (_N) * RAS_TABLE_RECORD_SIZE) + +#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \ + (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE) + +/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off + * of "fri", return the absolute record index off of the end of + * the table header. + */ +#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \ + (_C)->ras_max_record_count) + +#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ + RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE) + +#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ + RAS_TABLE_HEADER_SIZE - \ + RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE) + +#define to_ras_core_context(x) (container_of(x, struct ras_core_context, ras_eeprom)) + +static bool __is_ras_eeprom_supported(struct ras_core_context *ras_core) +{ + return ras_core->ras_eeprom_supported; +} + +static bool __get_eeprom_i2c_addr(struct ras_core_context *ras_core, + struct ras_eeprom_control *control) +{ + int ret = -EINVAL; + + if (control->sys_func && + control->sys_func->update_eeprom_i2c_config) + ret = control->sys_func->update_eeprom_i2c_config(ras_core); + else + RAS_DEV_WARN(ras_core->dev, + "No eeprom i2c system config!\n"); + + return !ret ? true : false; +} + +static int __ras_eeprom_xfer(struct ras_core_context *ras_core, u32 eeprom_addr, + u8 *eeprom_buf, u32 buf_size, bool read) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + int ret; + + if (control->sys_func && control->sys_func->eeprom_i2c_xfer) { + ret = control->sys_func->eeprom_i2c_xfer(ras_core, + eeprom_addr, eeprom_buf, buf_size, read); + + if ((ret > 0) && !read) { + /* According to EEPROM specs the length of the + * self-writing cycle, tWR (tW), is 10 ms. + * + * TODO: Use polling on ACK, aka Acknowledge + * Polling, to minimize waiting for the + * internal write cycle to complete, as it is + * usually smaller than tWR (tW). + */ + msleep(10); + } + + return ret; + } + + RAS_DEV_ERR(ras_core->dev, "Error: No eeprom i2c system xfer function!\n"); + return -EINVAL; +} + + +/** + * __eeprom_xfer -- Read/write from/to an I2C EEPROM device + * @i2c_adap: pointer to the I2C adapter to use + * @eeprom_addr: EEPROM address from which to read/write + * @eeprom_buf: pointer to data buffer to read into/write from + * @buf_size: the size of @eeprom_buf + * @read: True if reading from the EEPROM, false if writing + * + * Returns the number of bytes read/written; -errno on error. + */ +static int __eeprom_xfer(struct ras_core_context *ras_core, u32 eeprom_addr, + u8 *eeprom_buf, u32 buf_size, bool read) +{ + u16 limit; + u16 ps; /* Partial size */ + int res = 0, r; + + if (read) + limit = ras_core->ras_eeprom.max_read_len; + else + limit = ras_core->ras_eeprom.max_write_len; + + if (limit && (limit <= EEPROM_OFFSET_SIZE)) { + RAS_DEV_ERR(ras_core->dev, + "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d", + eeprom_addr, buf_size, + read ? "read" : "write", EEPROM_OFFSET_SIZE); + return -EINVAL; + } + + ras_core_down_gpu_reset_lock(ras_core); + + if (limit == 0) { + res = __ras_eeprom_xfer(ras_core, eeprom_addr, + eeprom_buf, buf_size, read); + } else { + /* The "limit" includes all data bytes sent/received, + * which would include the EEPROM_OFFSET_SIZE bytes. + * Account for them here. + */ + limit -= EEPROM_OFFSET_SIZE; + for ( ; buf_size > 0; + buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) { + ps = (buf_size < limit) ? buf_size : limit; + + r = __ras_eeprom_xfer(ras_core, eeprom_addr, + eeprom_buf, ps, read); + if (r < 0) + break; + + res += r; + } + } + + ras_core_up_gpu_reset_lock(ras_core); + + return res; +} + +static int __eeprom_read(struct ras_core_context *ras_core, + u32 eeprom_addr, u8 *eeprom_buf, u32 bytes) +{ + return __eeprom_xfer(ras_core, eeprom_addr, + eeprom_buf, bytes, true); +} + +static int __eeprom_write(struct ras_core_context *ras_core, + u32 eeprom_addr, u8 *eeprom_buf, u32 bytes) +{ + return __eeprom_xfer(ras_core, eeprom_addr, + eeprom_buf, bytes, false); +} + +static void +__encode_table_header_to_buf(struct ras_eeprom_table_header *hdr, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + + pp[0] = cpu_to_le32(hdr->header); + pp[1] = cpu_to_le32(hdr->version); + pp[2] = cpu_to_le32(hdr->first_rec_offset); + pp[3] = cpu_to_le32(hdr->tbl_size); + pp[4] = cpu_to_le32(hdr->checksum); +} + +static void +__decode_table_header_from_buf(struct ras_eeprom_table_header *hdr, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + + hdr->header = le32_to_cpu(pp[0]); + hdr->version = le32_to_cpu(pp[1]); + hdr->first_rec_offset = le32_to_cpu(pp[2]); + hdr->tbl_size = le32_to_cpu(pp[3]); + hdr->checksum = le32_to_cpu(pp[4]); +} + +static int __write_table_header(struct ras_eeprom_control *control) +{ + u8 buf[RAS_TABLE_HEADER_SIZE]; + struct ras_core_context *ras_core = to_ras_core_context(control); + int res; + + memset(buf, 0, sizeof(buf)); + __encode_table_header_to_buf(&control->tbl_hdr, buf); + + /* i2c may be unstable in gpu reset */ + res = __eeprom_write(ras_core, + control->i2c_address + + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); + + if (res < 0) { + RAS_DEV_ERR(ras_core->dev, + "Failed to write EEPROM table header:%d\n", res); + } else if (res < RAS_TABLE_HEADER_SIZE) { + RAS_DEV_ERR(ras_core->dev, + "Short write:%d out of %d\n", res, RAS_TABLE_HEADER_SIZE); + res = -EIO; + } else { + res = 0; + } + + return res; +} + +static void +__encode_table_ras_info_to_buf(struct ras_eeprom_table_ras_info *rai, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + u32 tmp; + + tmp = ((uint32_t)(rai->rma_status) & 0xFF) | + (((uint32_t)(rai->health_percent) << 8) & 0xFF00) | + (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000); + pp[0] = cpu_to_le32(tmp); +} + +static void +__decode_table_ras_info_from_buf(struct ras_eeprom_table_ras_info *rai, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + u32 tmp; + + tmp = le32_to_cpu(pp[0]); + rai->rma_status = tmp & 0xFF; + rai->health_percent = (tmp >> 8) & 0xFF; + rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF; +} + +static int __write_table_ras_info(struct ras_eeprom_control *control) +{ + struct ras_core_context *ras_core = to_ras_core_context(control); + u8 *buf; + int res; + + buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); + if (!buf) { + RAS_DEV_ERR(ras_core->dev, + "Failed to alloc buf to write table ras info\n"); + return -ENOMEM; + } + + __encode_table_ras_info_to_buf(&control->tbl_rai, buf); + + /* i2c may be unstable in gpu reset */ + res = __eeprom_write(ras_core, + control->i2c_address + + control->ras_info_offset, + buf, RAS_TABLE_V2_1_INFO_SIZE); + + if (res < 0) { + RAS_DEV_ERR(ras_core->dev, + "Failed to write EEPROM table ras info:%d\n", res); + } else if (res < RAS_TABLE_V2_1_INFO_SIZE) { + RAS_DEV_ERR(ras_core->dev, + "Short write:%d out of %d\n", res, RAS_TABLE_V2_1_INFO_SIZE); + res = -EIO; + } else { + res = 0; + } + + kfree(buf); + + return res; +} + +static u8 __calc_hdr_byte_sum(const struct ras_eeprom_control *control) +{ + int ii; + u8 *pp, csum; + u32 sz; + + /* Header checksum, skip checksum field in the calculation */ + sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); + pp = (u8 *) &control->tbl_hdr; + csum = 0; + for (ii = 0; ii < sz; ii++, pp++) + csum += *pp; + + return csum; +} + +static u8 __calc_ras_info_byte_sum(const struct ras_eeprom_control *control) +{ + int ii; + u8 *pp, csum; + u32 sz; + + sz = sizeof(control->tbl_rai); + pp = (u8 *) &control->tbl_rai; + csum = 0; + for (ii = 0; ii < sz; ii++, pp++) + csum += *pp; + + return csum; +} + +static int ras_eeprom_correct_header_tag( + struct ras_eeprom_control *control, + uint32_t header) +{ + struct ras_eeprom_table_header *hdr = &control->tbl_hdr; + u8 *hh; + int res; + u8 csum; + + csum = -hdr->checksum; + + hh = (void *) &hdr->header; + csum -= (hh[0] + hh[1] + hh[2] + hh[3]); + hh = (void *) &header; + csum += hh[0] + hh[1] + hh[2] + hh[3]; + csum = -csum; + mutex_lock(&control->ras_tbl_mutex); + hdr->header = header; + hdr->checksum = csum; + res = __write_table_header(control); + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +static void ras_set_eeprom_table_version(struct ras_eeprom_control *control) +{ + struct ras_eeprom_table_header *hdr = &control->tbl_hdr; + + hdr->version = RAS_TABLE_VER_V3; +} + +/** + * ras_eeprom_reset_table -- Reset the RAS EEPROM table + * @control: pointer to control structure + * + * Reset the contents of the header of the RAS EEPROM table. + * Return 0 on success, -errno on error. + */ +int ras_eeprom_reset_table(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + struct ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct ras_eeprom_table_ras_info *rai = &control->tbl_rai; + u8 csum; + int res; + + mutex_lock(&control->ras_tbl_mutex); + + hdr->header = RAS_TABLE_HDR_VAL; + ras_set_eeprom_table_version(control); + + if (hdr->version >= RAS_TABLE_VER_V2_1) { + hdr->first_rec_offset = RAS_RECORD_START_V2_1; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE; + rai->rma_status = RAS_GPU_HEALTH_USABLE; + /** + * GPU health represented as a percentage. + * 0 means worst health, 100 means fully health. + */ + rai->health_percent = 100; + /* ecc_page_threshold = 0 means disable bad page retirement */ + rai->ecc_page_threshold = control->record_threshold_count; + } else { + hdr->first_rec_offset = RAS_RECORD_START; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE; + } + + csum = __calc_hdr_byte_sum(control); + if (hdr->version >= RAS_TABLE_VER_V2_1) + csum += __calc_ras_info_byte_sum(control); + csum = -csum; + hdr->checksum = csum; + res = __write_table_header(control); + if (!res && hdr->version > RAS_TABLE_VER_V1) + res = __write_table_ras_info(control); + + control->ras_num_recs = 0; + control->ras_fri = 0; + + control->bad_channel_bitmap = 0; + ras_core_event_notify(ras_core, RAS_EVENT_ID__UPDATE_BAD_PAGE_NUM, + &control->ras_num_recs); + ras_core_event_notify(ras_core, RAS_EVENT_ID__UPDATE_BAD_CHANNEL_BITMAP, + &control->bad_channel_bitmap); + control->update_channel_flag = false; + + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +static void +__encode_table_record_to_buf(struct ras_eeprom_control *control, + struct eeprom_umc_record *record, + unsigned char *buf) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + buf[i++] = record->err_type; + + buf[i++] = record->bank; + + tmp = cpu_to_le64(record->ts); + memcpy(buf + i, &tmp, 8); + i += 8; + + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); + memcpy(buf + i, &tmp, 6); + i += 6; + + buf[i++] = record->mem_channel; + buf[i++] = record->mcumc_id; + + tmp = cpu_to_le64((record->retired_row_pfn & 0xffffffffffff)); + memcpy(buf + i, &tmp, 6); +} + +static void +__decode_table_record_from_buf(struct ras_eeprom_control *control, + struct eeprom_umc_record *record, + unsigned char *buf) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + record->err_type = buf[i++]; + + record->bank = buf[i++]; + + memcpy(&tmp, buf + i, 8); + record->ts = le64_to_cpu(tmp); + i += 8; + + memcpy(&tmp, buf + i, 6); + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); + i += 6; + + record->mem_channel = buf[i++]; + record->mcumc_id = buf[i++]; + + memcpy(&tmp, buf + i, 6); + record->retired_row_pfn = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +bool ras_eeprom_check_safety_watermark(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + bool ret = false; + int bad_page_count; + + if (!__is_ras_eeprom_supported(ras_core) || + !control->record_threshold_config) + return false; + + bad_page_count = ras_umc_get_badpage_count(ras_core); + if (control->tbl_hdr.header == RAS_TABLE_HDR_BAD) { + if (bad_page_count > control->record_threshold_count) + RAS_DEV_WARN(ras_core->dev, "RAS records:%d exceed threshold:%d", + bad_page_count, control->record_threshold_count); + + if ((control->record_threshold_config == WARN_NONSTOP_OVER_THRESHOLD) || + (control->record_threshold_config == NONSTOP_OVER_THRESHOLD)) { + RAS_DEV_WARN(ras_core->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); + ret = false; + } else { + ras_core->is_rma = true; + RAS_DEV_WARN(ras_core->dev, + "Please consider adjusting the customized threshold.\n"); + ret = true; + } + } + + return ret; +} + +/** + * __ras_eeprom_write -- write indexed from buffer to EEPROM + * @control: pointer to control structure + * @buf: pointer to buffer containing data to write + * @fri: start writing at this index + * @num: number of records to write + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __ras_eeprom_write(struct ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) +{ + struct ras_core_context *ras_core = to_ras_core_context(control); + u32 buf_size; + int res; + + /* i2c may be unstable in gpu reset */ + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = __eeprom_write(ras_core, + control->i2c_address + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); + if (res < 0) { + RAS_DEV_ERR(ras_core->dev, + "Writing %d EEPROM table records error:%d\n", num, res); + } else if (res < buf_size) { + /* Short write, return error.*/ + RAS_DEV_ERR(ras_core->dev, + "Wrote %d records out of %d\n", + (res/RAS_TABLE_RECORD_SIZE), num); + res = -EIO; + } else { + res = 0; + } + + return res; +} + +static int ras_eeprom_append_table(struct ras_eeprom_control *control, + struct eeprom_umc_record *record, + const u32 num) +{ + u32 a, b, i; + u8 *buf, *pp; + int res; + + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Encode all of them in one go. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { + __encode_table_record_to_buf(control, &record[i], pp); + + /* update bad channel bitmap */ + if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + control->update_channel_flag = true; + } + } + + /* a, first record index to write into. + * b, last record index to write into. + * a = first index to read (fri) + number of records in the table, + * b = a + @num - 1. + * Let N = control->ras_max_num_record_count, then we have, + * case 0: 0 <= a <= b < N, + * just append @num records starting at a; + * case 1: 0 <= a < N <= b, + * append (N - a) records starting at a, and + * append the remainder, b % N + 1, starting at 0. + * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases, + * case 2a: 0 <= a <= b < N + * append num records starting at a; and fix fri if b overwrote it, + * and since a <= b, if b overwrote it then a must've also, + * and if b didn't overwrite it, then a didn't also. + * case 2b: 0 <= b < a < N + * write num records starting at a, which wraps around 0=N + * and overwrite fri unconditionally. Now from case 2a, + * this means that b eclipsed fri to overwrite it and wrap + * around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally + * set fri = b + 1 (mod N). + * Now, since fri is updated in every case, except the trivial case 0, + * the number of records present in the table after writing, is, + * num_recs - 1 = b - fri (mod N), and we take the positive value, + * by adding an arbitrary multiple of N before taking the modulo N + * as shown below. + */ + a = control->ras_fri + control->ras_num_recs; + b = a + num - 1; + if (b < control->ras_max_record_count) { + res = __ras_eeprom_write(control, buf, a, num); + } else if (a < control->ras_max_record_count) { + u32 g0, g1; + + g0 = control->ras_max_record_count - a; + g1 = b % control->ras_max_record_count + 1; + res = __ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; + if (g1 > control->ras_fri) + control->ras_fri = g1 % control->ras_max_record_count; + } else { + a %= control->ras_max_record_count; + b %= control->ras_max_record_count; + + if (a <= b) { + /* Note that, b - a + 1 = num. */ + res = __ras_eeprom_write(control, buf, a, num); + if (res) + goto Out; + if (b >= control->ras_fri) + control->ras_fri = (b + 1) % control->ras_max_record_count; + } else { + u32 g0, g1; + + /* b < a, which means, we write from + * a to the end of the table, and from + * the start of the table to b. + */ + g0 = control->ras_max_record_count - a; + g1 = b + 1; + res = __ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, 0, g1); + if (res) + goto Out; + control->ras_fri = g1 % control->ras_max_record_count; + } + } + control->ras_num_recs = 1 + + (control->ras_max_record_count + b - control->ras_fri) + % control->ras_max_record_count; +Out: + kfree(buf); + return res; +} + +static int ras_eeprom_update_header(struct ras_eeprom_control *control) +{ + struct ras_core_context *ras_core = to_ras_core_context(control); + int threshold_config = control->record_threshold_config; + u8 *buf, *pp, csum; + u32 buf_size; + int bad_page_count; + int res; + + bad_page_count = ras_umc_get_badpage_count(ras_core); + /* Modify the header if it exceeds. + */ + if (threshold_config != 0 && + bad_page_count > control->record_threshold_count) { + RAS_DEV_WARN(ras_core->dev, + "Saved bad pages %d reaches threshold value %d\n", + bad_page_count, control->record_threshold_count); + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { + control->tbl_rai.rma_status = RAS_GPU_RETIRED__ECC_REACH_THRESHOLD; + control->tbl_rai.health_percent = 0; + } + + if ((threshold_config != WARN_NONSTOP_OVER_THRESHOLD) && + (threshold_config != NONSTOP_OVER_THRESHOLD)) + ras_core->is_rma = true; + + /* ignore the -ENOTSUPP return value */ + ras_core_event_notify(ras_core, RAS_EVENT_ID__DEVICE_RMA, NULL); + } + + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + else + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + control->tbl_hdr.checksum = 0; + + buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) { + RAS_DEV_ERR(ras_core->dev, + "allocating memory for table of size %d bytes failed\n", + control->tbl_hdr.tbl_size); + res = -ENOMEM; + goto Out; + } + + res = __eeprom_read(ras_core, + control->i2c_address + + control->ras_record_offset, + buf, buf_size); + if (res < 0) { + RAS_DEV_ERR(ras_core->dev, + "EEPROM failed reading records:%d\n", res); + goto Out; + } else if (res < buf_size) { + RAS_DEV_ERR(ras_core->dev, + "EEPROM read %d out of %d bytes\n", res, buf_size); + res = -EIO; + goto Out; + } + + /** + * bad page records have been stored in eeprom, + * now calculate gpu health percent + */ + if (threshold_config != 0 && + control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 && + bad_page_count <= control->record_threshold_count) + control->tbl_rai.health_percent = ((control->record_threshold_count - + bad_page_count) * 100) / control->record_threshold_count; + + /* Recalc the checksum. + */ + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; + + csum += __calc_hdr_byte_sum(control); + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) + csum += __calc_ras_info_byte_sum(control); + /* avoid sign extension when assigning to "checksum" */ + csum = -csum; + control->tbl_hdr.checksum = csum; + res = __write_table_header(control); + if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1) + res = __write_table_ras_info(control); +Out: + kfree(buf); + return res; +} + +/** + * ras_core_eeprom_append -- append records to the EEPROM RAS table + * @control: pointer to control structure + * @record: array of records to append + * @num: number of records in @record array + * + * Append @num records to the table, calculate the checksum and write + * the table back to EEPROM. The maximum number of records that + * can be appended is between 1 and control->ras_max_record_count, + * regardless of how many records are already stored in the table. + * + * Return 0 on success or if EEPROM is not supported, -errno on error. + */ +int ras_eeprom_append(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, const u32 num) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + int res; + + if (!__is_ras_eeprom_supported(ras_core)) + return 0; + + if (num == 0) { + RAS_DEV_ERR(ras_core->dev, "will not append 0 records\n"); + return -EINVAL; + } else if ((num + control->ras_num_recs) > control->ras_max_record_count) { + RAS_DEV_ERR(ras_core->dev, + "cannot append %d records than the size of table %d\n", + num, control->ras_max_record_count); + return -EINVAL; + } + + mutex_lock(&control->ras_tbl_mutex); + res = ras_eeprom_append_table(control, record, num); + if (!res) + res = ras_eeprom_update_header(control); + + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +/** + * __ras_eeprom_read -- read indexed from EEPROM into buffer + * @control: pointer to control structure + * @buf: pointer to buffer to read into + * @fri: first record index, start reading at this index, absolute index + * @num: number of records to read + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __ras_eeprom_read(struct ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) +{ + struct ras_core_context *ras_core = to_ras_core_context(control); + u32 buf_size; + int res; + + /* i2c may be unstable in gpu reset */ + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = __eeprom_read(ras_core, + control->i2c_address + + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); + if (res < 0) { + RAS_DEV_ERR(ras_core->dev, + "Reading %d EEPROM table records error:%d\n", num, res); + } else if (res < buf_size) { + /* Short read, return error. + */ + RAS_DEV_ERR(ras_core->dev, + "Read %d records out of %d\n", + (res/RAS_TABLE_RECORD_SIZE), num); + res = -EIO; + } else { + res = 0; + } + + return res; +} + +/** + * ras_eeprom_read -- read EEPROM + * @control: pointer to control structure + * @record: array of records to read into + * @num: number of records in @record + * + * Reads num records from the RAS table in EEPROM and + * writes the data into @record array. + * + * Returns 0 on success, -errno on error. + */ +int ras_eeprom_read(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, const u32 num) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + int i, res; + u8 *buf, *pp; + u32 g0, g1; + + if (!__is_ras_eeprom_supported(ras_core)) + return 0; + + if (num == 0) { + RAS_DEV_ERR(ras_core->dev, "will not read 0 records\n"); + return -EINVAL; + } else if (num > control->ras_num_recs) { + RAS_DEV_ERR(ras_core->dev, + "too many records to read:%d available:%d\n", + num, control->ras_num_recs); + return -EINVAL; + } + + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Determine how many records to read, from the first record + * index, fri, to the end of the table, and from the beginning + * of the table, such that the total number of records is + * @num, and we handle wrap around when fri > 0 and + * fri + num > RAS_MAX_RECORD_COUNT. + * + * First we compute the index of the last element + * which would be fetched from each region, + * g0 is in [fri, fri + num - 1], and + * g1 is in [0, RAS_MAX_RECORD_COUNT - 1]. + * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of + * the last element to fetch, we set g0 to _the number_ + * of elements to fetch, @num, since we know that the last + * indexed to be fetched does not exceed the table. + * + * If, however, g0 >= RAS_MAX_RECORD_COUNT, then + * we set g0 to the number of elements to read + * until the end of the table, and g1 to the number of + * elements to read from the beginning of the table. + */ + g0 = control->ras_fri + num - 1; + g1 = g0 % control->ras_max_record_count; + if (g0 < control->ras_max_record_count) { + g0 = num; + g1 = 0; + } else { + g0 = control->ras_max_record_count - control->ras_fri; + g1 += 1; + } + + mutex_lock(&control->ras_tbl_mutex); + res = __ras_eeprom_read(control, buf, control->ras_fri, g0); + if (res) + goto Out; + if (g1) { + res = __ras_eeprom_read(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, 0, g1); + if (res) + goto Out; + } + + res = 0; + + /* Read up everything? Then transform. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { + __decode_table_record_from_buf(control, &record[i], pp); + + /* update bad channel bitmap */ + if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + control->update_channel_flag = true; + } + } +Out: + kfree(buf); + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +uint32_t ras_eeprom_max_record_count(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + + /* get available eeprom table version first before eeprom table init */ + ras_set_eeprom_table_version(control); + + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) + return RAS_MAX_RECORD_COUNT_V2_1; + else + return RAS_MAX_RECORD_COUNT; +} + +/** + * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum + * @control: pointer to control structure + * + * Check the checksum of the stored in EEPROM RAS table. + * + * Return 0 if the checksum is correct, + * positive if it is not correct, and + * -errno on I/O error. + */ +static int __verify_ras_table_checksum(struct ras_eeprom_control *control) +{ + struct ras_core_context *ras_core = to_ras_core_context(control); + int buf_size, res; + u8 csum, *buf, *pp; + + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) + buf_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + else + buf_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) { + RAS_DEV_ERR(ras_core->dev, + "Out of memory checking RAS table checksum.\n"); + return -ENOMEM; + } + + res = __eeprom_read(ras_core, + control->i2c_address + + control->ras_header_offset, + buf, buf_size); + if (res < buf_size) { + RAS_DEV_ERR(ras_core->dev, + "Partial read for checksum, res:%d\n", res); + /* On partial reads, return -EIO. + */ + if (res >= 0) + res = -EIO; + goto Out; + } + + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; +Out: + kfree(buf); + return res < 0 ? res : csum; +} + +static int __read_table_ras_info(struct ras_eeprom_control *control) +{ + struct ras_eeprom_table_ras_info *rai = &control->tbl_rai; + struct ras_core_context *ras_core = to_ras_core_context(control); + unsigned char *buf; + int res; + + buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); + if (!buf) { + RAS_DEV_ERR(ras_core->dev, + "Failed to alloc buf to read EEPROM table ras info\n"); + return -ENOMEM; + } + + /** + * EEPROM table V2_1 supports ras info, + * read EEPROM table ras info + */ + res = __eeprom_read(ras_core, + control->i2c_address + control->ras_info_offset, + buf, RAS_TABLE_V2_1_INFO_SIZE); + if (res < RAS_TABLE_V2_1_INFO_SIZE) { + RAS_DEV_ERR(ras_core->dev, + "Failed to read EEPROM table ras info, res:%d\n", res); + res = res >= 0 ? -EIO : res; + goto Out; + } + + __decode_table_ras_info_from_buf(rai, buf); + +Out: + kfree(buf); + return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res; +} + +static int __check_ras_table_status(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 }; + struct ras_eeprom_table_header *hdr; + int res; + + hdr = &control->tbl_hdr; + + if (!__is_ras_eeprom_supported(ras_core)) + return 0; + + if (!__get_eeprom_i2c_addr(ras_core, control)) + return -EINVAL; + + control->ras_header_offset = RAS_HDR_START; + control->ras_info_offset = RAS_TABLE_V2_1_INFO_START; + mutex_init(&control->ras_tbl_mutex); + + /* Read the table header from EEPROM address */ + res = __eeprom_read(ras_core, + control->i2c_address + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); + if (res < RAS_TABLE_HEADER_SIZE) { + RAS_DEV_ERR(ras_core->dev, + "Failed to read EEPROM table header, res:%d\n", res); + return res >= 0 ? -EIO : res; + } + + __decode_table_header_from_buf(hdr, buf); + + if (hdr->header != RAS_TABLE_HDR_VAL && + hdr->header != RAS_TABLE_HDR_BAD) { + RAS_DEV_INFO(ras_core->dev, "Creating a new EEPROM table"); + return ras_eeprom_reset_table(ras_core); + } + + switch (hdr->version) { + case RAS_TABLE_VER_V2_1: + case RAS_TABLE_VER_V3: + control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); + control->ras_record_offset = RAS_RECORD_START_V2_1; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; + break; + case RAS_TABLE_VER_V1: + control->ras_num_recs = RAS_NUM_RECS(hdr); + control->ras_record_offset = RAS_RECORD_START; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + break; + default: + RAS_DEV_ERR(ras_core->dev, + "RAS header invalid, unsupported version: %u", + hdr->version); + return -EINVAL; + } + + if (control->ras_num_recs > control->ras_max_record_count) { + RAS_DEV_ERR(ras_core->dev, + "RAS header invalid, records in header: %u max allowed :%u", + control->ras_num_recs, control->ras_max_record_count); + return -EINVAL; + } + + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + + return 0; +} + +int ras_eeprom_check_storage_status(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + struct ras_eeprom_table_header *hdr; + int bad_page_count; + int res = 0; + + if (!__is_ras_eeprom_supported(ras_core)) + return 0; + + if (!__get_eeprom_i2c_addr(ras_core, control)) + return -EINVAL; + + hdr = &control->tbl_hdr; + + bad_page_count = ras_umc_get_badpage_count(ras_core); + if (hdr->header == RAS_TABLE_HDR_VAL) { + RAS_DEV_INFO(ras_core->dev, + "Found existing EEPROM table with %d records\n", + bad_page_count); + + if (hdr->version >= RAS_TABLE_VER_V2_1) { + res = __read_table_ras_info(control); + if (res) + return res; + } + + res = __verify_ras_table_checksum(control); + if (res) + RAS_DEV_ERR(ras_core->dev, + "RAS table incorrect checksum or error:%d\n", res); + + /* Warn if we are at 90% of the threshold or above + */ + if (10 * bad_page_count >= 9 * control->record_threshold_count) + RAS_DEV_WARN(ras_core->dev, + "RAS records:%u exceeds 90%% of threshold:%d\n", + bad_page_count, + control->record_threshold_count); + + } else if (hdr->header == RAS_TABLE_HDR_BAD && + control->record_threshold_config != 0) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { + res = __read_table_ras_info(control); + if (res) + return res; + } + + res = __verify_ras_table_checksum(control); + if (res) + RAS_DEV_ERR(ras_core->dev, + "RAS Table incorrect checksum or error:%d\n", res); + + if (control->record_threshold_count >= bad_page_count) { + /* This means that, the threshold was increased since + * the last time the system was booted, and now, + * ras->record_threshold_count - control->num_recs > 0, + * so that at least one more record can be saved, + * before the page count threshold is reached. + */ + RAS_DEV_INFO(ras_core->dev, + "records:%d threshold:%d, resetting RAS table header signature", + bad_page_count, + control->record_threshold_count); + res = ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); + } else { + RAS_DEV_ERR(ras_core->dev, "RAS records:%d exceed threshold:%d", + bad_page_count, control->record_threshold_count); + if ((control->record_threshold_config == WARN_NONSTOP_OVER_THRESHOLD) || + (control->record_threshold_config == NONSTOP_OVER_THRESHOLD)) { + RAS_DEV_WARN(ras_core->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n"); + res = 0; + } else { + ras_core->is_rma = true; + RAS_DEV_ERR(ras_core->dev, + "User defined threshold is set, runtime service will be halt when threshold is reached\n"); + } + } + } + + return res < 0 ? res : 0; +} + +int ras_eeprom_hw_init(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control; + struct ras_eeprom_config *eeprom_cfg; + + if (!ras_core) + return -EINVAL; + + ras_core->is_rma = false; + + control = &ras_core->ras_eeprom; + + memset(control, 0, sizeof(*control)); + + eeprom_cfg = &ras_core->config->eeprom_cfg; + control->record_threshold_config = + eeprom_cfg->eeprom_record_threshold_config; + + control->record_threshold_count = ras_eeprom_max_record_count(ras_core); + if (eeprom_cfg->eeprom_record_threshold_count < + control->record_threshold_count) + control->record_threshold_count = + eeprom_cfg->eeprom_record_threshold_count; + + control->sys_func = eeprom_cfg->eeprom_sys_fn; + control->max_read_len = eeprom_cfg->max_i2c_read_len; + control->max_write_len = eeprom_cfg->max_i2c_write_len; + control->i2c_adapter = eeprom_cfg->eeprom_i2c_adapter; + control->i2c_port = eeprom_cfg->eeprom_i2c_port; + control->i2c_address = eeprom_cfg->eeprom_i2c_addr; + + control->update_channel_flag = false; + + return __check_ras_table_status(ras_core); +} + +int ras_eeprom_hw_fini(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control; + + if (!ras_core) + return -EINVAL; + + control = &ras_core->ras_eeprom; + mutex_destroy(&control->ras_tbl_mutex); + + return 0; +} + +uint32_t ras_eeprom_get_record_count(struct ras_core_context *ras_core) +{ + if (!ras_core) + return 0; + + return ras_core->ras_eeprom.ras_num_recs; +} + +void ras_eeprom_sync_info(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control; + + if (!ras_core) + return; + + control = &ras_core->ras_eeprom; + ras_core_event_notify(ras_core, RAS_EVENT_ID__UPDATE_BAD_PAGE_NUM, + &control->ras_num_recs); + ras_core_event_notify(ras_core, RAS_EVENT_ID__UPDATE_BAD_CHANNEL_BITMAP, + &control->bad_channel_bitmap); +} + +enum ras_gpu_health_status + ras_eeprom_check_gpu_status(struct ras_core_context *ras_core) +{ + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + struct ras_eeprom_table_ras_info *rai = &control->tbl_rai; + + if (!__is_ras_eeprom_supported(ras_core) || + !control->record_threshold_config) + return RAS_GPU_HEALTH_NONE; + + if (control->tbl_hdr.header == RAS_TABLE_HDR_BAD) + return RAS_GPU_IN_BAD_STATUS; + + return rai->rma_status; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h new file mode 100644 index 000000000000..2abe566c18b6 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_EEPROM_H__ +#define __RAS_EEPROM_H__ +#include "ras_sys.h" + +#define RAS_TABLE_VER_V1 0x00010000 +#define RAS_TABLE_VER_V2_1 0x00021000 +#define RAS_TABLE_VER_V3 0x00030000 + +#define NONSTOP_OVER_THRESHOLD -2 +#define WARN_NONSTOP_OVER_THRESHOLD -1 +#define DISABLE_RETIRE_PAGE 0 + +/* + * Bad address pfn : eeprom_umc_record.retired_row_pfn[39:0], + * nps mode: eeprom_umc_record.retired_row_pfn[47:40] + */ +#define EEPROM_RECORD_UMC_ADDR_MASK 0xFFFFFFFFFFULL +#define EEPROM_RECORD_UMC_NPS_MASK 0xFF0000000000ULL +#define EEPROM_RECORD_UMC_NPS_SHIFT 40 + +#define EEPROM_RECORD_UMC_NPS_MODE(RECORD) \ + (((RECORD)->retired_row_pfn & EEPROM_RECORD_UMC_NPS_MASK) >> \ + EEPROM_RECORD_UMC_NPS_SHIFT) + +#define EEPROM_RECORD_UMC_ADDR_PFN(RECORD) \ + ((RECORD)->retired_row_pfn & EEPROM_RECORD_UMC_ADDR_MASK) + +#define EEPROM_RECORD_SETUP_UMC_ADDR_AND_NPS(RECORD, ADDR, NPS) \ +do { \ + uint64_t tmp = (NPS); \ + tmp = ((tmp << EEPROM_RECORD_UMC_NPS_SHIFT) & EEPROM_RECORD_UMC_NPS_MASK); \ + tmp |= (ADDR) & EEPROM_RECORD_UMC_ADDR_MASK; \ + (RECORD)->retired_row_pfn = tmp; \ +} while (0) + +enum ras_gpu_health_status { + RAS_GPU_HEALTH_NONE = 0, + RAS_GPU_HEALTH_USABLE = 1, + RAS_GPU_RETIRED__ECC_REACH_THRESHOLD = 2, + RAS_GPU_IN_BAD_STATUS = 3, +}; + +enum ras_eeprom_err_type { + RAS_EEPROM_ERR_NA, + RAS_EEPROM_ERR_RECOVERABLE, + RAS_EEPROM_ERR_NON_RECOVERABLE, + RAS_EEPROM_ERR_COUNT, +}; + +struct ras_eeprom_table_header { + uint32_t header; + uint32_t version; + uint32_t first_rec_offset; + uint32_t tbl_size; + uint32_t checksum; +} __packed; + +struct ras_eeprom_table_ras_info { + u8 rma_status; + u8 health_percent; + u16 ecc_page_threshold; + u32 padding[64 - 1]; +} __packed; + +struct ras_eeprom_control { + struct ras_eeprom_table_header tbl_hdr; + struct ras_eeprom_table_ras_info tbl_rai; + + /* record threshold */ + int record_threshold_config; + uint32_t record_threshold_count; + bool update_channel_flag; + + const struct ras_eeprom_sys_func *sys_func; + void *i2c_adapter; + u32 i2c_port; + u16 max_read_len; + u16 max_write_len; + + /* Base I2C EEPPROM 19-bit memory address, + * where the table is located. For more information, + * see top of amdgpu_eeprom.c. + */ + u32 i2c_address; + + /* The byte offset off of @i2c_address + * where the table header is found, + * and where the records start--always + * right after the header. + */ + u32 ras_header_offset; + u32 ras_info_offset; + u32 ras_record_offset; + + /* Number of records in the table. + */ + u32 ras_num_recs; + + /* First record index to read, 0-based. + * Range is [0, num_recs-1]. This is + * an absolute index, starting right after + * the table header. + */ + u32 ras_fri; + + /* Maximum possible number of records + * we could store, i.e. the maximum capacity + * of the table. + */ + u32 ras_max_record_count; + + /* Protect table access via this mutex. + */ + struct mutex ras_tbl_mutex; + + /* Record channel info which occurred bad pages + */ + u32 bad_channel_bitmap; +}; + +/* + * Represents single table record. Packed to be easily serialized into byte + * stream. + */ +struct eeprom_umc_record { + + union { + uint64_t address; + uint64_t offset; + }; + + uint64_t retired_row_pfn; + uint64_t ts; + + enum ras_eeprom_err_type err_type; + + union { + unsigned char bank; + unsigned char cu; + }; + + unsigned char mem_channel; + unsigned char mcumc_id; + + /* The following variables will not be saved to eeprom. + */ + uint64_t cur_nps_retired_row_pfn; + uint32_t cur_nps_bank; + uint32_t cur_nps; +}; + +struct ras_core_context; +int ras_eeprom_hw_init(struct ras_core_context *ras_core); +int ras_eeprom_hw_fini(struct ras_core_context *ras_core); + +int ras_eeprom_reset_table(struct ras_core_context *ras_core); + +bool ras_eeprom_check_safety_watermark(struct ras_core_context *ras_core); + +int ras_eeprom_read(struct ras_core_context *ras_core, + struct eeprom_umc_record *records, const u32 num); + +int ras_eeprom_append(struct ras_core_context *ras_core, + struct eeprom_umc_record *records, const u32 num); + +uint32_t ras_eeprom_max_record_count(struct ras_core_context *ras_core); +uint32_t ras_eeprom_get_record_count(struct ras_core_context *ras_core); +void ras_eeprom_sync_info(struct ras_core_context *ras_core); + +int ras_eeprom_check_storage_status(struct ras_core_context *ras_core); +enum ras_gpu_health_status + ras_eeprom_check_gpu_status(struct ras_core_context *ras_core); +#endif -- cgit v1.2.3 From 9f3083dc9f1484ebe1a68512972037227967c1c3 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 5 Jun 2025 17:46:08 +0800 Subject: drm/amd/ras: Add psp v13_0 ras functions Add psp v13_0 ras functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c | 46 +++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h | 31 +++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c new file mode 100644 index 000000000000..626cf39b75ac --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ras.h" +#include "ras_psp_v13_0.h" + +#define regMP0_SMN_C2PMSG_67 0x0083 +#define regMP0_SMN_C2PMSG_67_BASE_IDX 0 + +static uint32_t ras_psp_v13_0_ring_wptr_get(struct ras_core_context *ras_core) +{ + return RAS_DEV_RREG32_SOC15(ras_core->dev, MP0, 0, regMP0_SMN_C2PMSG_67); +} + +static int ras_psp_v13_0_ring_wptr_set(struct ras_core_context *ras_core, uint32_t value) +{ + RAS_DEV_WREG32_SOC15(ras_core->dev, MP0, 0, regMP0_SMN_C2PMSG_67, value); + + return 0; +} + +const struct ras_psp_ip_func ras_psp_v13_0 = { + .psp_ras_ring_wptr_get = ras_psp_v13_0_ring_wptr_get, + .psp_ras_ring_wptr_set = ras_psp_v13_0_ring_wptr_set, +}; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h new file mode 100644 index 000000000000..b705ffe38a12 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_psp_v13_0.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_PSP_V13_0_H__ +#define __RAS_PSP_V13_0_H__ +#include "ras_psp.h" + +extern const struct ras_psp_ip_func ras_psp_v13_0; + +#endif -- cgit v1.2.3 From c49ef01183b0362bb7cbdd2647441f8d70b9ac93 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 5 Jun 2025 17:46:51 +0800 Subject: drm/amd/ras: Add psp ras common functions Add psp ras common functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_psp.c | 750 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_psp.h | 145 ++++++ drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h | 231 +++++++++ 3 files changed, 1126 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_psp.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_psp.h create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_psp.c b/drivers/gpu/drm/amd/ras/rascore/ras_psp.c new file mode 100644 index 000000000000..c94effd4b114 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_psp.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_ta_if.h" +#include "ras_psp.h" +#include "ras_psp_v13_0.h" + +/* position of instance value in sub_block_index of + * ta_ras_trigger_error_input, the sub block uses lower 12 bits + */ +#define RAS_TA_INST_MASK 0xfffff000 +#define RAS_TA_INST_SHIFT 0xc + +static const struct ras_psp_ip_func *ras_psp_get_ip_funcs( + struct ras_core_context *ras_core, uint32_t ip_version) +{ + switch (ip_version) { + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): + return &ras_psp_v13_0; + default: + RAS_DEV_ERR(ras_core->dev, + "psp ip version(0x%x) is not supported!\n", ip_version); + break; + } + + return NULL; +} + +static int ras_psp_sync_system_ras_psp_status(struct ras_core_context *ras_core) +{ + struct ras_psp *psp = &ras_core->ras_psp; + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; + struct ras_psp_sys_status status = {0}; + int ret; + + if (psp->sys_func && psp->sys_func->get_ras_psp_system_status) { + ret = psp->sys_func->get_ras_psp_system_status(ras_core, &status); + if (ret) + return ret; + + if (status.initialized) { + ta_ctx->preload_ras_ta_enabled = true; + ta_ctx->ras_ta_initialized = status.initialized; + ta_ctx->session_id = status.session_id; + } + + psp_ctx->external_mutex = status.psp_cmd_mutex; + } + + return 0; +} + +static int ras_psp_get_ras_ta_init_param(struct ras_core_context *ras_core, + struct ras_ta_init_param *ras_ta_param) +{ + struct ras_psp *psp = &ras_core->ras_psp; + + if (psp->sys_func && psp->sys_func->get_ras_ta_init_param) + return psp->sys_func->get_ras_ta_init_param(ras_core, ras_ta_param); + + RAS_DEV_ERR(ras_core->dev, "Not config get_ras_ta_init_param API!!\n"); + return -EACCES; +} + +static struct gpu_mem_block *ras_psp_get_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type) +{ + struct ras_psp *psp = &ras_core->ras_psp; + struct gpu_mem_block *gpu_mem = NULL; + int ret; + + switch (mem_type) { + case GPU_MEM_TYPE_RAS_PSP_RING: + gpu_mem = &psp->psp_ring.ras_ring_gpu_mem; + break; + case GPU_MEM_TYPE_RAS_PSP_CMD: + gpu_mem = &psp->psp_ctx.psp_cmd_gpu_mem; + break; + case GPU_MEM_TYPE_RAS_PSP_FENCE: + gpu_mem = &psp->psp_ctx.out_fence_gpu_mem; + break; + case GPU_MEM_TYPE_RAS_TA_FW: + gpu_mem = &psp->ta_ctx.fw_gpu_mem; + break; + case GPU_MEM_TYPE_RAS_TA_CMD: + gpu_mem = &psp->ta_ctx.cmd_gpu_mem; + break; + default: + return NULL; + } + + if (!gpu_mem->ref_count) { + ret = ras_core_get_gpu_mem(ras_core, mem_type, gpu_mem); + if (ret) + return NULL; + gpu_mem->mem_type = mem_type; + } + + gpu_mem->ref_count++; + + return gpu_mem; +} + +static int ras_psp_put_gpu_mem(struct ras_core_context *ras_core, + struct gpu_mem_block *gpu_mem) +{ + if (!gpu_mem) + return 0; + + gpu_mem->ref_count--; + + if (gpu_mem->ref_count > 0) { + return 0; + } else if (gpu_mem->ref_count < 0) { + RAS_DEV_WARN(ras_core->dev, + "Duplicate free gpu memory %u\n", gpu_mem->mem_type); + } else { + ras_core_put_gpu_mem(ras_core, gpu_mem->mem_type, gpu_mem); + memset(gpu_mem, 0, sizeof(*gpu_mem)); + } + + return 0; +} + +static void __acquire_psp_cmd_lock(struct ras_core_context *ras_core) +{ + struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; + + if (psp_ctx->external_mutex) + mutex_lock(psp_ctx->external_mutex); + else + mutex_lock(&psp_ctx->internal_mutex); +} + +static void __release_psp_cmd_lock(struct ras_core_context *ras_core) +{ + struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; + + if (psp_ctx->external_mutex) + mutex_unlock(psp_ctx->external_mutex); + else + mutex_unlock(&psp_ctx->internal_mutex); +} + +static uint32_t __get_ring_frame_slot(struct ras_core_context *ras_core) +{ + struct ras_psp *psp = &ras_core->ras_psp; + uint32_t ras_ring_wptr_dw; + + ras_ring_wptr_dw = psp->ip_func->psp_ras_ring_wptr_get(ras_core); + + return (ras_ring_wptr_dw << 2) / sizeof(struct psp_gfx_rb_frame); +} + +static int __set_ring_frame_slot(struct ras_core_context *ras_core, + uint32_t slot) +{ + struct ras_psp *psp = &ras_core->ras_psp; + + return psp->ip_func->psp_ras_ring_wptr_set(ras_core, + (slot * sizeof(struct psp_gfx_rb_frame)) >> 2); +} + +static int write_frame_to_ras_psp_ring(struct ras_core_context *ras_core, + struct psp_gfx_rb_frame *frame) +{ + struct gpu_mem_block *ring_mem; + struct psp_gfx_rb_frame *rb_frame; + uint32_t max_frame_slot; + uint32_t slot_idx; + uint32_t write_flush_read_back = 0; + int ret = 0; + + ring_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_RING); + if (!ring_mem) + return -ENOMEM; + + max_frame_slot = + ring_mem->mem_size / sizeof(struct psp_gfx_rb_frame); + + rb_frame = + (struct psp_gfx_rb_frame *)ring_mem->mem_cpu_addr; + + slot_idx = __get_ring_frame_slot(ras_core); + if (slot_idx >= max_frame_slot) + slot_idx = 0; + + memcpy(&rb_frame[slot_idx], frame, sizeof(*frame)); + + /* Do a read to force the write of the frame before writing + * write pointer. + */ + write_flush_read_back = rb_frame[slot_idx].fence_value; + if (write_flush_read_back != frame->fence_value) { + RAS_DEV_ERR(ras_core->dev, + "Failed to submit ring cmd! cmd:0x%x:0x%x, fence:0x%x:0x%x value:%u, expected:%u\n", + rb_frame[slot_idx].cmd_buf_addr_hi, + rb_frame[slot_idx].cmd_buf_addr_lo, + rb_frame[slot_idx].fence_addr_hi, + rb_frame[slot_idx].fence_addr_lo, + write_flush_read_back, frame->fence_value); + ret = -EACCES; + goto err; + } + + slot_idx++; + + if (slot_idx >= max_frame_slot) + slot_idx = 0; + + __set_ring_frame_slot(ras_core, slot_idx); + +err: + ras_psp_put_gpu_mem(ras_core, ring_mem); + return ret; +} + +static int send_psp_cmd(struct ras_core_context *ras_core, + enum psp_gfx_cmd_id gfx_cmd_id, void *cmd_data, + uint32_t cmd_size, struct psp_cmd_resp *resp) +{ + struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; + struct gpu_mem_block *psp_cmd_buf = NULL; + struct gpu_mem_block *psp_fence_buf = NULL; + struct psp_gfx_cmd_resp *gfx_cmd; + struct psp_gfx_rb_frame rb_frame; + int ret = 0; + int timeout = 1000; + + if (!cmd_data || (cmd_size > sizeof(union psp_gfx_commands)) || !resp) { + RAS_DEV_ERR(ras_core->dev, "Invalid RAS PSP command, id: %u\n", gfx_cmd_id); + return -EINVAL; + } + + __acquire_psp_cmd_lock(ras_core); + + psp_cmd_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_CMD); + if (!psp_cmd_buf) { + ret = -ENOMEM; + goto exit; + } + + psp_fence_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_FENCE); + if (!psp_fence_buf) { + ret = -ENOMEM; + goto exit; + } + + gfx_cmd = (struct psp_gfx_cmd_resp *)psp_cmd_buf->mem_cpu_addr; + memset(gfx_cmd, 0, sizeof(*gfx_cmd)); + gfx_cmd->cmd_id = gfx_cmd_id; + memcpy(&gfx_cmd->cmd, cmd_data, cmd_size); + + psp_ctx->in_fence_value++; + + memset(&rb_frame, 0, sizeof(rb_frame)); + rb_frame.cmd_buf_addr_hi = upper_32_bits(psp_cmd_buf->mem_mc_addr); + rb_frame.cmd_buf_addr_lo = lower_32_bits(psp_cmd_buf->mem_mc_addr); + rb_frame.fence_addr_hi = upper_32_bits(psp_fence_buf->mem_mc_addr); + rb_frame.fence_addr_lo = lower_32_bits(psp_fence_buf->mem_mc_addr); + rb_frame.fence_value = psp_ctx->in_fence_value; + + ret = write_frame_to_ras_psp_ring(ras_core, &rb_frame); + if (ret) { + psp_ctx->in_fence_value--; + goto exit; + } + + while (*((uint64_t *)psp_fence_buf->mem_cpu_addr) != + psp_ctx->in_fence_value) { + if (--timeout == 0) + break; + /* + * Shouldn't wait for timeout when err_event_athub occurs, + * because gpu reset thread triggered and lock resource should + * be released for psp resume sequence. + */ + if (ras_core_ras_interrupt_detected(ras_core)) + break; + + msleep(2); + } + + resp->status = gfx_cmd->resp.status; + resp->session_id = gfx_cmd->resp.session_id; + +exit: + ras_psp_put_gpu_mem(ras_core, psp_cmd_buf); + ras_psp_put_gpu_mem(ras_core, psp_fence_buf); + + __release_psp_cmd_lock(ras_core); + + return ret; +} + +static void __check_ras_ta_cmd_resp(struct ras_core_context *ras_core, + struct ras_ta_cmd *ras_cmd) +{ + + if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) { + RAS_DEV_WARN(ras_core->dev, "ECC switch disabled\n"); + ras_cmd->ras_status = RAS_TA_STATUS__ERROR_RAS_NOT_AVAILABLE; + } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag) + RAS_DEV_WARN(ras_core->dev, "RAS internal register access blocked\n"); + + switch (ras_cmd->ras_status) { + case RAS_TA_STATUS__ERROR_UNSUPPORTED_IP: + RAS_DEV_WARN(ras_core->dev, + "RAS WARNING: cmd failed due to unsupported ip\n"); + break; + case RAS_TA_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: + RAS_DEV_WARN(ras_core->dev, + "RAS WARNING: cmd failed due to unsupported error injection\n"); + break; + case RAS_TA_STATUS__SUCCESS: + break; + case RAS_TA_STATUS__TEE_ERROR_ACCESS_DENIED: + if (ras_cmd->cmd_id == RAS_TA_CMD_ID__TRIGGER_ERROR) + RAS_DEV_WARN(ras_core->dev, + "RAS WARNING: Inject error to critical region is not allowed\n"); + break; + default: + RAS_DEV_WARN(ras_core->dev, + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + break; + } +} + +static int send_ras_ta_runtime_cmd(struct ras_core_context *ras_core, + enum ras_ta_cmd_id cmd_id, void *in, uint32_t in_size, + void *out, uint32_t out_size) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + struct gpu_mem_block *cmd_mem; + struct ras_ta_cmd *ras_cmd; + struct psp_gfx_cmd_invoke_cmd invoke_cmd = {0}; + struct psp_cmd_resp resp = {0}; + int ret = 0; + + if (!in || (in_size > sizeof(union ras_ta_cmd_input)) || + (cmd_id >= MAX_RAS_TA_CMD_ID)) { + RAS_DEV_ERR(ras_core->dev, "Invalid RAS TA command, id: %u\n", cmd_id); + return -EINVAL; + } + + ras_psp_sync_system_ras_psp_status(ras_core); + + cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD); + if (!cmd_mem) + return -ENOMEM; + + if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) { + ret = -EACCES; + goto out; + } + + ras_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr; + + mutex_lock(&ta_ctx->ta_mutex); + + memset(ras_cmd, 0, sizeof(*ras_cmd)); + ras_cmd->cmd_id = cmd_id; + memcpy(&ras_cmd->ras_in_message, in, in_size); + + invoke_cmd.ta_cmd_id = cmd_id; + invoke_cmd.session_id = ta_ctx->session_id; + + ret = send_psp_cmd(ras_core, GFX_CMD_ID_INVOKE_CMD, + &invoke_cmd, sizeof(invoke_cmd), &resp); + + /* If err_event_athub occurs error inject was successful, however + * return status from TA is no long reliable + */ + if (ras_core_ras_interrupt_detected(ras_core)) { + ret = 0; + goto unlock; + } + + if (ret || resp.status) { + RAS_DEV_ERR(ras_core->dev, + "RAS: Failed to send psp cmd! ret:%d, status:%u\n", + ret, resp.status); + ret = -ESTRPIPE; + goto unlock; + } + + if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { + RAS_DEV_WARN(ras_core->dev, "RAS: Unsupported Interface\n"); + ret = -EINVAL; + goto unlock; + } + + if (!ras_cmd->ras_status && out && out_size) + memcpy(out, &ras_cmd->ras_out_message, out_size); + + __check_ras_ta_cmd_resp(ras_core, ras_cmd); + +unlock: + mutex_unlock(&ta_ctx->ta_mutex); + ras_core_up_gpu_reset_lock(ras_core); +out: + ras_psp_put_gpu_mem(ras_core, cmd_mem); + return ret; +} + +static int trigger_ras_ta_error(struct ras_core_context *ras_core, + struct ras_ta_trigger_error_input *info, uint32_t instance_mask) +{ + uint32_t dev_mask = 0; + + switch (info->block_id) { + case RAS_TA_BLOCK__GFX: + if (ras_gfx_get_ta_subblock(ras_core, info->inject_error_type, + info->sub_block_index, &info->sub_block_index)) + return -EINVAL; + + dev_mask = RAS_GET_MASK(ras_core->dev, GC, instance_mask); + break; + case RAS_TA_BLOCK__SDMA: + dev_mask = RAS_GET_MASK(ras_core->dev, SDMA0, instance_mask); + break; + case RAS_TA_BLOCK__VCN: + case RAS_TA_BLOCK__JPEG: + dev_mask = RAS_GET_MASK(ras_core->dev, VCN, instance_mask); + break; + default: + dev_mask = instance_mask; + break; + } + + /* reuse sub_block_index for backward compatibility */ + dev_mask <<= RAS_TA_INST_SHIFT; + dev_mask &= RAS_TA_INST_MASK; + info->sub_block_index |= dev_mask; + + return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__TRIGGER_ERROR, + info, sizeof(*info), NULL, 0); +} + +static int send_load_ta_fw_cmd(struct ras_core_context *ras_core, + struct ras_ta_ctx *ta_ctx) +{ + struct ras_ta_fw_bin *fw_bin = &ta_ctx->fw_bin; + struct gpu_mem_block *fw_mem; + struct gpu_mem_block *cmd_mem; + struct ras_ta_cmd *ta_cmd; + struct ras_ta_init_flags *ta_init_flags; + struct psp_gfx_cmd_load_ta psp_load_ta_cmd; + struct psp_cmd_resp resp = {0}; + struct ras_ta_image_header *fw_hdr = NULL; + int ret; + + fw_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_FW); + if (!fw_mem) + return -ENOMEM; + + cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD); + if (!cmd_mem) { + ret = -ENOMEM; + goto err; + } + + ret = ras_psp_get_ras_ta_init_param(ras_core, &ta_ctx->init_param); + if (ret) + goto err; + + if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) { + ret = -EACCES; + goto err; + } + + /* copy ras ta binary to shared gpu memory */ + memcpy(fw_mem->mem_cpu_addr, fw_bin->bin_addr, fw_bin->bin_size); + fw_mem->mem_size = fw_bin->bin_size; + + /* Initialize ras ta startup parameter */ + ta_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr; + ta_init_flags = &ta_cmd->ras_in_message.init_flags; + + ta_init_flags->poison_mode_en = ta_ctx->init_param.poison_mode_en; + ta_init_flags->dgpu_mode = ta_ctx->init_param.dgpu_mode; + ta_init_flags->xcc_mask = ta_ctx->init_param.xcc_mask; + ta_init_flags->channel_dis_num = ta_ctx->init_param.channel_dis_num; + ta_init_flags->nps_mode = ta_ctx->init_param.nps_mode; + ta_init_flags->active_umc_mask = ta_ctx->init_param.active_umc_mask; + + /* Setup load ras ta command */ + memset(&psp_load_ta_cmd, 0, sizeof(psp_load_ta_cmd)); + psp_load_ta_cmd.app_phy_addr_lo = lower_32_bits(fw_mem->mem_mc_addr); + psp_load_ta_cmd.app_phy_addr_hi = upper_32_bits(fw_mem->mem_mc_addr); + psp_load_ta_cmd.app_len = fw_mem->mem_size; + psp_load_ta_cmd.cmd_buf_phy_addr_lo = lower_32_bits(cmd_mem->mem_mc_addr); + psp_load_ta_cmd.cmd_buf_phy_addr_hi = upper_32_bits(cmd_mem->mem_mc_addr); + psp_load_ta_cmd.cmd_buf_len = cmd_mem->mem_size; + + ret = send_psp_cmd(ras_core, GFX_CMD_ID_LOAD_TA, + &psp_load_ta_cmd, sizeof(psp_load_ta_cmd), &resp); + if (!ret && !resp.status) { + /* Read TA version at FW offset 0x60 if TA version not found*/ + fw_hdr = (struct ras_ta_image_header *)fw_bin->bin_addr; + RAS_DEV_INFO(ras_core->dev, "PSP: RAS TA(version:%X.%X.%X.%X) is loaded.\n", + (fw_hdr->image_version >> 24) & 0xFF, (fw_hdr->image_version >> 16) & 0xFF, + (fw_hdr->image_version >> 8) & 0xFF, fw_hdr->image_version & 0xFF); + ta_ctx->ta_version = fw_hdr->image_version; + ta_ctx->session_id = resp.session_id; + ta_ctx->ras_ta_initialized = true; + } else { + RAS_DEV_ERR(ras_core->dev, + "Failed to load RAS TA! ret:%d, status:%d\n", ret, resp.status); + } + + ras_core_up_gpu_reset_lock(ras_core); + +err: + ras_psp_put_gpu_mem(ras_core, fw_mem); + ras_psp_put_gpu_mem(ras_core, cmd_mem); + return ret; +} + +static int load_ras_ta_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_load *ras_ta_load) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + struct ras_ta_fw_bin *fw_bin = &ta_ctx->fw_bin; + int ret; + + fw_bin->bin_addr = ras_ta_load->bin_addr; + fw_bin->bin_size = ras_ta_load->bin_size; + fw_bin->fw_version = ras_ta_load->fw_version; + fw_bin->feature_version = ras_ta_load->feature_version; + + ret = send_load_ta_fw_cmd(ras_core, ta_ctx); + if (!ret) { + ras_ta_load->out_session_id = ta_ctx->session_id; + ras_ta_load->out_loaded_ta_version = ta_ctx->ta_version; + } + + return ret; +} + +static int unload_ras_ta_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_unload *ras_ta_unload) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + struct psp_gfx_cmd_unload_ta cmd_unload_ta = {0}; + struct psp_cmd_resp resp = {0}; + int ret; + + if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) + return -EACCES; + + cmd_unload_ta.session_id = ta_ctx->session_id; + ret = send_psp_cmd(ras_core, GFX_CMD_ID_UNLOAD_TA, + &cmd_unload_ta, sizeof(cmd_unload_ta), &resp); + if (ret || resp.status) { + RAS_DEV_ERR(ras_core->dev, + "Failed to unload RAS TA! ret:%d, status:%u\n", + ret, resp.status); + goto unlock; + } + + kfree(ta_ctx->fw_bin.bin_addr); + memset(&ta_ctx->fw_bin, 0, sizeof(ta_ctx->fw_bin)); + ta_ctx->ta_version = 0; + ta_ctx->ras_ta_initialized = false; + ta_ctx->session_id = 0; + +unlock: + ras_core_up_gpu_reset_lock(ras_core); + + return ret; +} + +int ras_psp_load_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_load *ras_ta_load) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + struct ras_psp_ta_unload ras_ta_unload = {0}; + int ret; + + if (ta_ctx->preload_ras_ta_enabled) + return 0; + + if (!ras_ta_load) + return -EINVAL; + + if (ta_ctx->ras_ta_initialized) { + ras_ta_unload.ras_session_id = ta_ctx->session_id; + ret = unload_ras_ta_firmware(ras_core, &ras_ta_unload); + if (ret) + return ret; + } + + return load_ras_ta_firmware(ras_core, ras_ta_load); +} + +int ras_psp_unload_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_unload *ras_ta_unload) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + + if (ta_ctx->preload_ras_ta_enabled) + return 0; + + if ((!ras_ta_unload) || + (ras_ta_unload->ras_session_id != ta_ctx->session_id)) + return -EINVAL; + + return unload_ras_ta_firmware(ras_core, ras_ta_unload); +} + +int ras_psp_trigger_error(struct ras_core_context *ras_core, + struct ras_ta_trigger_error_input *info, uint32_t instance_mask) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + + if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized) { + RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!"); + return -ENOEXEC; + } + + if (!info) + return -EINVAL; + + return trigger_ras_ta_error(ras_core, info, instance_mask); +} + +int ras_psp_query_address(struct ras_core_context *ras_core, + struct ras_ta_query_address_input *addr_in, + struct ras_ta_query_address_output *addr_out) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + + if (!ta_ctx->preload_ras_ta_enabled && + !ta_ctx->ras_ta_initialized) { + RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!"); + return -ENOEXEC; + } + + if (!addr_in || !addr_out) + return -EINVAL; + + return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__QUERY_ADDRESS, + addr_in, sizeof(*addr_in), addr_out, sizeof(*addr_out)); +} + +int ras_psp_sw_init(struct ras_core_context *ras_core) +{ + struct ras_psp *psp = &ras_core->ras_psp; + + memset(psp, 0, sizeof(*psp)); + + psp->sys_func = ras_core->config->psp_cfg.psp_sys_fn; + if (!psp->sys_func) { + RAS_DEV_ERR(ras_core->dev, "RAS psp sys function not configured!\n"); + return -EINVAL; + } + + mutex_init(&psp->psp_ctx.internal_mutex); + mutex_init(&psp->ta_ctx.ta_mutex); + + return 0; +} + +int ras_psp_sw_fini(struct ras_core_context *ras_core) +{ + struct ras_psp *psp = &ras_core->ras_psp; + + mutex_destroy(&psp->psp_ctx.internal_mutex); + mutex_destroy(&psp->ta_ctx.ta_mutex); + + memset(psp, 0, sizeof(*psp)); + + return 0; +} + +int ras_psp_hw_init(struct ras_core_context *ras_core) +{ + struct ras_psp *psp = &ras_core->ras_psp; + + psp->psp_ip_version = ras_core->config->psp_ip_version; + + psp->ip_func = ras_psp_get_ip_funcs(ras_core, psp->psp_ip_version); + if (!psp->ip_func) + return -EINVAL; + + /* After GPU reset, the system RAS PSP status may change. + * therefore, it is necessary to synchronize the system status again. + */ + ras_psp_sync_system_ras_psp_status(ras_core); + + return 0; +} + +int ras_psp_hw_fini(struct ras_core_context *ras_core) +{ + return 0; +} + +bool ras_psp_check_supported_cmd(struct ras_core_context *ras_core, + enum ras_ta_cmd_id cmd_id) +{ + struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; + bool ret = false; + + if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized) + return false; + + switch (cmd_id) { + case RAS_TA_CMD_ID__QUERY_ADDRESS: + /* Currently, querying the address from RAS TA is only supported + * when the RAS TA firmware is loaded during driver installation. + */ + if (ta_ctx->preload_ras_ta_enabled) + ret = true; + break; + case RAS_TA_CMD_ID__TRIGGER_ERROR: + ret = true; + break; + default: + ret = false; + break; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_psp.h b/drivers/gpu/drm/amd/ras/rascore/ras_psp.h new file mode 100644 index 000000000000..71776fecfd66 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_psp.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_PSP_H__ +#define __RAS_PSP_H__ +#include "ras.h" +#include "ras_ta_if.h" + +struct ras_core_context; +struct ras_ta_trigger_error_input; +struct ras_ta_query_address_input; +struct ras_ta_query_address_output; +enum ras_ta_cmd_id; + +struct ras_ta_image_header { + uint32_t reserved1[24]; + uint32_t image_version; /* [0x60] Off Chip Firmware Version */ + uint32_t reserved2[39]; +}; + +struct ras_psp_sys_status { + bool initialized; + uint32_t session_id; + void *psp_cmd_mutex; +}; + +struct ras_ta_init_param { + uint8_t poison_mode_en; + uint8_t dgpu_mode; + uint16_t xcc_mask; + uint8_t channel_dis_num; + uint8_t nps_mode; + uint32_t active_umc_mask; +}; + +struct gpu_mem_block { + uint32_t mem_type; + void *mem_bo; + uint64_t mem_mc_addr; + void *mem_cpu_addr; + uint32_t mem_size; + int ref_count; + void *private; +}; + +struct ras_psp_ip_func { + uint32_t (*psp_ras_ring_wptr_get)(struct ras_core_context *ras_core); + int (*psp_ras_ring_wptr_set)(struct ras_core_context *ras_core, uint32_t wptr); +}; + +struct ras_psp_ring { + struct gpu_mem_block ras_ring_gpu_mem; +}; + +struct psp_cmd_resp { + uint32_t status; + uint32_t session_id; +}; + +struct ras_psp_ctx { + void *external_mutex; + struct mutex internal_mutex; + uint64_t in_fence_value; + struct gpu_mem_block psp_cmd_gpu_mem; + struct gpu_mem_block out_fence_gpu_mem; +}; + +struct ras_ta_fw_bin { + uint32_t fw_version; + uint32_t feature_version; + uint32_t bin_size; + uint8_t *bin_addr; +}; + +struct ras_ta_ctx { + bool preload_ras_ta_enabled; + bool ras_ta_initialized; + uint32_t session_id; + uint32_t resp_status; + uint32_t ta_version; + struct mutex ta_mutex; + struct ras_ta_fw_bin fw_bin; + struct ras_ta_init_param init_param; + struct gpu_mem_block fw_gpu_mem; + struct gpu_mem_block cmd_gpu_mem; +}; + +struct ras_psp { + uint32_t psp_ip_version; + struct ras_psp_ring psp_ring; + struct ras_psp_ctx psp_ctx; + struct ras_ta_ctx ta_ctx; + const struct ras_psp_ip_func *ip_func; + const struct ras_psp_sys_func *sys_func; +}; + +struct ras_psp_ta_load { + uint32_t fw_version; + uint32_t feature_version; + uint32_t bin_size; + uint8_t *bin_addr; + uint64_t out_session_id; + uint32_t out_loaded_ta_version; +}; + +struct ras_psp_ta_unload { + uint64_t ras_session_id; +}; + +int ras_psp_sw_init(struct ras_core_context *ras_core); +int ras_psp_sw_fini(struct ras_core_context *ras_core); +int ras_psp_hw_init(struct ras_core_context *ras_core); +int ras_psp_hw_fini(struct ras_core_context *ras_core); +int ras_psp_load_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_load *ras_ta_load); +int ras_psp_unload_firmware(struct ras_core_context *ras_core, + struct ras_psp_ta_unload *ras_ta_unload); +int ras_psp_trigger_error(struct ras_core_context *ras_core, + struct ras_ta_trigger_error_input *info, uint32_t instance_mask); +int ras_psp_query_address(struct ras_core_context *ras_core, + struct ras_ta_query_address_input *addr_in, + struct ras_ta_query_address_output *addr_out); +bool ras_psp_check_supported_cmd(struct ras_core_context *ras_core, + enum ras_ta_cmd_id cmd_id); +#endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h b/drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h new file mode 100644 index 000000000000..0921e36d3274 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_ta_if.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _RAS_TA_IF_H +#define _RAS_TA_IF_H +#include "ras.h" + +#define RAS_TA_HOST_IF_VER 0 + +/* Responses have bit 31 set */ +#define RSP_ID_MASK (1U << 31) +#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) + +/* invalid node instance value */ +#define RAS_TA_INV_NODE 0xffff + +/* RAS related enumerations */ +/**********************************************************/ +enum ras_ta_cmd_id { + RAS_TA_CMD_ID__ENABLE_FEATURES = 0, + RAS_TA_CMD_ID__DISABLE_FEATURES, + RAS_TA_CMD_ID__TRIGGER_ERROR, + RAS_TA_CMD_ID__QUERY_BLOCK_INFO, + RAS_TA_CMD_ID__QUERY_SUB_BLOCK_INFO, + RAS_TA_CMD_ID__QUERY_ADDRESS, + MAX_RAS_TA_CMD_ID +}; + +enum ras_ta_status { + RAS_TA_STATUS__SUCCESS = 0x0000, + RAS_TA_STATUS__RESET_NEEDED = 0xA001, + RAS_TA_STATUS__ERROR_INVALID_PARAMETER = 0xA002, + RAS_TA_STATUS__ERROR_RAS_NOT_AVAILABLE = 0xA003, + RAS_TA_STATUS__ERROR_RAS_DUPLICATE_CMD = 0xA004, + RAS_TA_STATUS__ERROR_INJECTION_FAILED = 0xA005, + RAS_TA_STATUS__ERROR_ASD_READ_WRITE = 0xA006, + RAS_TA_STATUS__ERROR_TOGGLE_DF_CSTATE = 0xA007, + RAS_TA_STATUS__ERROR_TIMEOUT = 0xA008, + RAS_TA_STATUS__ERROR_BLOCK_DISABLED = 0XA009, + RAS_TA_STATUS__ERROR_GENERIC = 0xA00A, + RAS_TA_STATUS__ERROR_RAS_MMHUB_INIT = 0xA00B, + RAS_TA_STATUS__ERROR_GET_DEV_INFO = 0xA00C, + RAS_TA_STATUS__ERROR_UNSUPPORTED_DEV = 0xA00D, + RAS_TA_STATUS__ERROR_NOT_INITIALIZED = 0xA00E, + RAS_TA_STATUS__ERROR_TEE_INTERNAL = 0xA00F, + RAS_TA_STATUS__ERROR_UNSUPPORTED_FUNCTION = 0xA010, + RAS_TA_STATUS__ERROR_SYS_DRV_REG_ACCESS = 0xA011, + RAS_TA_STATUS__ERROR_RAS_READ_WRITE = 0xA012, + RAS_TA_STATUS__ERROR_NULL_PTR = 0xA013, + RAS_TA_STATUS__ERROR_UNSUPPORTED_IP = 0xA014, + RAS_TA_STATUS__ERROR_PCS_STATE_QUIET = 0xA015, + RAS_TA_STATUS__ERROR_PCS_STATE_ERROR = 0xA016, + RAS_TA_STATUS__ERROR_PCS_STATE_HANG = 0xA017, + RAS_TA_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018, + RAS_TA_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019, + RAS_TA_STATUS__TEE_ERROR_ACCESS_DENIED = 0xA01A +}; + +enum ras_ta_block { + RAS_TA_BLOCK__UMC = 0, + RAS_TA_BLOCK__SDMA, + RAS_TA_BLOCK__GFX, + RAS_TA_BLOCK__MMHUB, + RAS_TA_BLOCK__ATHUB, + RAS_TA_BLOCK__PCIE_BIF, + RAS_TA_BLOCK__HDP, + RAS_TA_BLOCK__XGMI_WAFL, + RAS_TA_BLOCK__DF, + RAS_TA_BLOCK__SMN, + RAS_TA_BLOCK__SEM, + RAS_TA_BLOCK__MP0, + RAS_TA_BLOCK__MP1, + RAS_TA_BLOCK__FUSE, + RAS_TA_BLOCK__MCA, + RAS_TA_BLOCK__VCN, + RAS_TA_BLOCK__JPEG, + RAS_TA_BLOCK__IH, + RAS_TA_BLOCK__MPIO, + RAS_TA_BLOCK__MMSCH, + RAS_TA_NUM_BLOCK_MAX +}; + +enum ras_ta_mca_block { + RAS_TA_MCA_BLOCK__MP0 = 0, + RAS_TA_MCA_BLOCK__MP1 = 1, + RAS_TA_MCA_BLOCK__MPIO = 2, + RAS_TA_MCA_BLOCK__IOHC = 3, + RAS_TA_MCA_NUM_BLOCK_MAX +}; + +enum ras_ta_error_type { + RAS_TA_ERROR__NONE = 0, + RAS_TA_ERROR__PARITY = 1, + RAS_TA_ERROR__SINGLE_CORRECTABLE = 2, + RAS_TA_ERROR__MULTI_UNCORRECTABLE = 4, + RAS_TA_ERROR__POISON = 8, +}; + +enum ras_ta_address_type { + RAS_TA_MCA_TO_PA, + RAS_TA_PA_TO_MCA, +}; + +enum ras_ta_nps_mode { + RAS_TA_UNKNOWN_MODE = 0, + RAS_TA_NPS1_MODE = 1, + RAS_TA_NPS2_MODE = 2, + RAS_TA_NPS4_MODE = 4, + RAS_TA_NPS8_MODE = 8, +}; + +/* Input/output structures for RAS commands */ +/**********************************************************/ + +struct ras_ta_enable_features_input { + enum ras_ta_block block_id; + enum ras_ta_error_type error_type; +}; + +struct ras_ta_disable_features_input { + enum ras_ta_block block_id; + enum ras_ta_error_type error_type; +}; + +struct ras_ta_trigger_error_input { + /* ras-block. i.e. umc, gfx */ + enum ras_ta_block block_id; + + /* type of error. i.e. single_correctable */ + enum ras_ta_error_type inject_error_type; + + /* mem block. i.e. hbm, sram etc. */ + uint32_t sub_block_index; + + /* explicit address of error */ + uint64_t address; + + /* method if error injection. i.e persistent, coherent etc. */ + uint64_t value; +}; + +struct ras_ta_init_flags { + uint8_t poison_mode_en; + uint8_t dgpu_mode; + uint16_t xcc_mask; + uint8_t channel_dis_num; + uint8_t nps_mode; + uint32_t active_umc_mask; +}; + +struct ras_ta_mca_addr { + uint64_t err_addr; + uint32_t ch_inst; + uint32_t umc_inst; + uint32_t node_inst; + uint32_t socket_id; +}; + +struct ras_ta_phy_addr { + uint64_t pa; + uint32_t bank; + uint32_t channel_idx; +}; + +struct ras_ta_query_address_input { + enum ras_ta_address_type addr_type; + struct ras_ta_mca_addr ma; + struct ras_ta_phy_addr pa; +}; + +struct ras_ta_output_flags { + uint8_t ras_init_success_flag; + uint8_t err_inject_switch_disable_flag; + uint8_t reg_access_failure_flag; +}; + +struct ras_ta_query_address_output { + /* don't use the flags here */ + struct ras_ta_output_flags flags; + struct ras_ta_mca_addr ma; + struct ras_ta_phy_addr pa; +}; + +/* Common input structure for RAS callbacks */ +/**********************************************************/ +union ras_ta_cmd_input { + struct ras_ta_init_flags init_flags; + struct ras_ta_enable_features_input enable_features; + struct ras_ta_disable_features_input disable_features; + struct ras_ta_trigger_error_input trigger_error; + struct ras_ta_query_address_input address; + uint32_t reserve_pad[256]; +}; + +union ras_ta_cmd_output { + struct ras_ta_output_flags flags; + struct ras_ta_query_address_output address; + uint32_t reserve_pad[256]; +}; + +struct ras_ta_cmd { + uint32_t cmd_id; + uint32_t resp_id; + uint32_t ras_status; + uint32_t if_version; + union ras_ta_cmd_input ras_in_message; + union ras_ta_cmd_output ras_out_message; +}; + +#endif -- cgit v1.2.3 From 19030244e1f99f3e9f8617ba4cb90ce49276eab0 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:31:24 +0800 Subject: drm/amd/ras: Add ras ioctl command handler Add ras ioctl command handler. V2: Remove ras global device list. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_cmd.c | 527 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_cmd.h | 425 ++++++++++++++++++++++++ 2 files changed, 952 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_cmd.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_cmd.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c new file mode 100644 index 000000000000..697619b30ea2 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_cmd.h" + +#define RAS_CMD_MAJOR_VERSION 6 +#define RAS_CMD_MINOR_VERSION 0 +#define RAS_CMD_VERSION (((RAS_CMD_MAJOR_VERSION) << 10) | (RAS_CMD_MINOR_VERSION)) + +static int ras_cmd_add_device(struct ras_core_context *ras_core) +{ + INIT_LIST_HEAD(&ras_core->ras_cmd.head); + ras_core->ras_cmd.ras_core = ras_core; + ras_core->ras_cmd.dev_handle = (uint64_t)ras_core ^ RAS_CMD_DEV_HANDLE_MAGIC; + return 0; +} + +static int ras_cmd_remove_device(struct ras_core_context *ras_core) +{ + memset(&ras_core->ras_cmd, 0, sizeof(ras_core->ras_cmd)); + return 0; +} + +static int ras_get_block_ecc_info(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_block_ecc_info_req *input_data = + (struct ras_cmd_block_ecc_info_req *)cmd->input_buff_raw; + struct ras_cmd_block_ecc_info_rsp *output_data = + (struct ras_cmd_block_ecc_info_rsp *)cmd->output_buff_raw; + struct ras_ecc_count err_data; + int ret; + + if (cmd->input_size != sizeof(struct ras_cmd_block_ecc_info_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + memset(&err_data, 0, sizeof(err_data)); + ret = ras_aca_get_block_ecc_count(ras_core, input_data->block_id, &err_data); + if (ret) + return RAS_CMD__ERROR_GENERIC; + + output_data->ce_count = err_data.total_ce_count; + output_data->ue_count = err_data.total_ue_count; + output_data->de_count = err_data.total_de_count; + + cmd->output_size = sizeof(struct ras_cmd_block_ecc_info_rsp); + return RAS_CMD__SUCCESS; +} + +static void ras_cmd_update_bad_page_info(struct ras_cmd_bad_page_record *ras_cmd_record, + struct eeprom_umc_record *record) +{ + ras_cmd_record->retired_page = record->cur_nps_retired_row_pfn; + ras_cmd_record->ts = record->ts; + ras_cmd_record->err_type = record->err_type; + ras_cmd_record->mem_channel = record->mem_channel; + ras_cmd_record->mcumc_id = record->mcumc_id; + ras_cmd_record->address = record->address; + ras_cmd_record->bank = record->bank; + ras_cmd_record->valid = 1; +} + +static int ras_cmd_get_group_bad_pages(struct ras_core_context *ras_core, + uint32_t group_index, struct ras_cmd_bad_pages_info_rsp *output_data) +{ + struct eeprom_umc_record record; + struct ras_cmd_bad_page_record *ras_cmd_record; + uint32_t i = 0, bp_cnt = 0, group_cnt = 0; + + output_data->bp_in_group = 0; + output_data->group_index = 0; + + bp_cnt = ras_umc_get_badpage_count(ras_core); + if (bp_cnt) { + output_data->group_index = group_index; + group_cnt = bp_cnt / RAS_CMD_MAX_BAD_PAGES_PER_GROUP + + ((bp_cnt % RAS_CMD_MAX_BAD_PAGES_PER_GROUP) ? 1 : 0); + + if (group_index >= group_cnt) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + i = group_index * RAS_CMD_MAX_BAD_PAGES_PER_GROUP; + for (; + i < bp_cnt && output_data->bp_in_group < RAS_CMD_MAX_BAD_PAGES_PER_GROUP; + i++) { + if (ras_umc_get_badpage_record(ras_core, i, &record)) + return RAS_CMD__ERROR_GENERIC; + + ras_cmd_record = &output_data->records[i % RAS_CMD_MAX_BAD_PAGES_PER_GROUP]; + + memset(ras_cmd_record, 0, sizeof(*ras_cmd_record)); + ras_cmd_update_bad_page_info(ras_cmd_record, &record); + output_data->bp_in_group++; + } + } + output_data->bp_total_cnt = bp_cnt; + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_get_bad_pages(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_bad_pages_info_req *input_data = + (struct ras_cmd_bad_pages_info_req *)cmd->input_buff_raw; + struct ras_cmd_bad_pages_info_rsp *output_data = + (struct ras_cmd_bad_pages_info_rsp *)cmd->output_buff_raw; + int ret; + + if (cmd->input_size != sizeof(struct ras_cmd_bad_pages_info_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + ret = ras_cmd_get_group_bad_pages(ras_core, input_data->group_index, output_data); + if (ret) + return RAS_CMD__ERROR_GENERIC; + + output_data->version = 0; + + cmd->output_size = sizeof(struct ras_cmd_bad_pages_info_rsp); + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_clear_bad_page_info(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + if (cmd->input_size != sizeof(struct ras_cmd_dev_handle)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + if (ras_eeprom_reset_table(ras_core)) + return RAS_CMD__ERROR_GENERIC; + + if (ras_umc_clean_badpage_data(ras_core)) + return RAS_CMD__ERROR_GENERIC; + + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_reset_all_error_counts(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + if (cmd->input_size != sizeof(struct ras_cmd_dev_handle)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + if (ras_aca_clear_all_blocks_ecc_count(ras_core)) + return RAS_CMD__ERROR_GENERIC; + + if (ras_umc_clear_logged_ecc(ras_core)) + return RAS_CMD__ERROR_GENERIC; + + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_get_cper_snapshot(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_cper_snapshot_rsp *output_data = + (struct ras_cmd_cper_snapshot_rsp *)cmd->output_buff_raw; + struct ras_log_batch_overview overview; + + if (cmd->input_size != sizeof(struct ras_cmd_cper_snapshot_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + ras_log_ring_get_batch_overview(ras_core, &overview); + + output_data->total_cper_num = overview.logged_batch_count; + output_data->start_cper_id = overview.first_batch_id; + output_data->latest_cper_id = overview.last_batch_id; + + output_data->version = 0; + + cmd->output_size = sizeof(struct ras_cmd_cper_snapshot_rsp); + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_get_cper_records(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_cper_record_req *req = + (struct ras_cmd_cper_record_req *)cmd->input_buff_raw; + struct ras_cmd_cper_record_rsp *rsp = + (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw; + struct ras_log_info *trace[MAX_RECORD_PER_BATCH] = {0}; + struct ras_log_batch_overview overview; + uint32_t offset = 0, real_data_len = 0; + uint64_t batch_id; + uint8_t *buffer; + int ret = 0, i, count; + + if (cmd->input_size != sizeof(struct ras_cmd_cper_record_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + if (!req->buf_size || !req->buf_ptr || !req->cper_num) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + if (!access_ok((void *)req->buf_ptr, req->buf_size)) { + RAS_DEV_ERR(ras_core->dev, "Invalid cper buffer memory!\n"); + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + } + + buffer = kzalloc(req->buf_size, GFP_KERNEL); + if (!buffer) + return RAS_CMD__ERROR_GENERIC; + + ras_log_ring_get_batch_overview(ras_core, &overview); + for (i = 0; i < req->cper_num; i++) { + batch_id = req->cper_start_id + i; + if (batch_id >= overview.last_batch_id) + break; + + count = ras_log_ring_get_batch_records(ras_core, batch_id, trace, + ARRAY_SIZE(trace)); + if (count > 0) { + ret = ras_cper_generate_cper(ras_core, trace, count, + &buffer[offset], req->buf_size - offset, &real_data_len); + if (ret) + break; + + offset += real_data_len; + } + } + + if ((ret && (ret != -ENOMEM)) || + copy_to_user((void *)req->buf_ptr, buffer, offset)) { + kfree(buffer); + return RAS_CMD__ERROR_GENERIC; + } + + rsp->real_data_size = offset; + rsp->real_cper_num = i; + rsp->remain_num = (ret == -ENOMEM) ? (req->cper_num - i) : 0; + rsp->version = 0; + + cmd->output_size = sizeof(struct ras_cmd_cper_record_rsp); + + kfree(buffer); + + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_get_batch_trace_snapshot(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_batch_trace_snapshot_rsp *rsp = + (struct ras_cmd_batch_trace_snapshot_rsp *)cmd->output_buff_raw; + struct ras_log_batch_overview overview; + + + if (cmd->input_size != sizeof(struct ras_cmd_batch_trace_snapshot_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + ras_log_ring_get_batch_overview(ras_core, &overview); + + rsp->total_batch_num = overview.logged_batch_count; + rsp->start_batch_id = overview.first_batch_id; + rsp->latest_batch_id = overview.last_batch_id; + rsp->version = 0; + + cmd->output_size = sizeof(struct ras_cmd_batch_trace_snapshot_rsp); + return RAS_CMD__SUCCESS; +} + +static int ras_cmd_get_batch_trace_records(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_batch_trace_record_req *input_data = + (struct ras_cmd_batch_trace_record_req *)cmd->input_buff_raw; + struct ras_cmd_batch_trace_record_rsp *output_data = + (struct ras_cmd_batch_trace_record_rsp *)cmd->output_buff_raw; + struct ras_log_batch_overview overview; + struct ras_log_info *trace_arry[MAX_RECORD_PER_BATCH] = {0}; + struct ras_log_info *record; + int i, j, count = 0, offset = 0; + uint64_t id; + bool completed = false; + + if (cmd->input_size != sizeof(struct ras_cmd_batch_trace_record_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + if ((!input_data->batch_num) || (input_data->batch_num > RAS_CMD_MAX_BATCH_NUM)) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + ras_log_ring_get_batch_overview(ras_core, &overview); + if ((input_data->start_batch_id < overview.first_batch_id) || + (input_data->start_batch_id >= overview.last_batch_id)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + for (i = 0; i < input_data->batch_num; i++) { + id = input_data->start_batch_id + i; + if (id >= overview.last_batch_id) { + completed = true; + break; + } + + count = ras_log_ring_get_batch_records(ras_core, + id, trace_arry, ARRAY_SIZE(trace_arry)); + if (count > 0) { + if ((offset + count) > RAS_CMD_MAX_TRACE_NUM) + break; + for (j = 0; j < count; j++) { + record = &output_data->records[offset + j]; + record->seqno = trace_arry[j]->seqno; + record->timestamp = trace_arry[j]->timestamp; + record->event = trace_arry[j]->event; + memcpy(&record->aca_reg, + &trace_arry[j]->aca_reg, sizeof(trace_arry[j]->aca_reg)); + } + } else { + count = 0; + } + + output_data->batchs[i].batch_id = id; + output_data->batchs[i].offset = offset; + output_data->batchs[i].trace_num = count; + offset += count; + } + + output_data->start_batch_id = input_data->start_batch_id; + output_data->real_batch_num = i; + output_data->remain_num = completed ? 0 : (input_data->batch_num - i); + output_data->version = 0; + + cmd->output_size = sizeof(struct ras_cmd_batch_trace_record_rsp); + + return RAS_CMD__SUCCESS; +} + +static enum ras_ta_block __get_ras_ta_block(enum ras_block_id block) +{ + switch (block) { + case RAS_BLOCK_ID__UMC: + return RAS_TA_BLOCK__UMC; + case RAS_BLOCK_ID__SDMA: + return RAS_TA_BLOCK__SDMA; + case RAS_BLOCK_ID__GFX: + return RAS_TA_BLOCK__GFX; + case RAS_BLOCK_ID__MMHUB: + return RAS_TA_BLOCK__MMHUB; + case RAS_BLOCK_ID__ATHUB: + return RAS_TA_BLOCK__ATHUB; + case RAS_BLOCK_ID__PCIE_BIF: + return RAS_TA_BLOCK__PCIE_BIF; + case RAS_BLOCK_ID__HDP: + return RAS_TA_BLOCK__HDP; + case RAS_BLOCK_ID__XGMI_WAFL: + return RAS_TA_BLOCK__XGMI_WAFL; + case RAS_BLOCK_ID__DF: + return RAS_TA_BLOCK__DF; + case RAS_BLOCK_ID__SMN: + return RAS_TA_BLOCK__SMN; + case RAS_BLOCK_ID__SEM: + return RAS_TA_BLOCK__SEM; + case RAS_BLOCK_ID__MP0: + return RAS_TA_BLOCK__MP0; + case RAS_BLOCK_ID__MP1: + return RAS_TA_BLOCK__MP1; + case RAS_BLOCK_ID__FUSE: + return RAS_TA_BLOCK__FUSE; + case RAS_BLOCK_ID__MCA: + return RAS_TA_BLOCK__MCA; + case RAS_BLOCK_ID__VCN: + return RAS_TA_BLOCK__VCN; + case RAS_BLOCK_ID__JPEG: + return RAS_TA_BLOCK__JPEG; + default: + return RAS_TA_BLOCK__UMC; + } +} + +static enum ras_ta_error_type __get_ras_ta_err_type(enum ras_ecc_err_type error) +{ + switch (error) { + case RAS_ECC_ERR__NONE: + return RAS_TA_ERROR__NONE; + case RAS_ECC_ERR__PARITY: + return RAS_TA_ERROR__PARITY; + case RAS_ECC_ERR__SINGLE_CORRECTABLE: + return RAS_TA_ERROR__SINGLE_CORRECTABLE; + case RAS_ECC_ERR__MULTI_UNCORRECTABLE: + return RAS_TA_ERROR__MULTI_UNCORRECTABLE; + case RAS_ECC_ERR__POISON: + return RAS_TA_ERROR__POISON; + default: + return RAS_TA_ERROR__NONE; + } +} + +static int ras_cmd_inject_error(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_inject_error_req *req = + (struct ras_cmd_inject_error_req *)cmd->input_buff_raw; + struct ras_cmd_inject_error_rsp *output_data = + (struct ras_cmd_inject_error_rsp *)cmd->output_buff_raw; + int ret = 0; + struct ras_ta_trigger_error_input block_info = { + .block_id = __get_ras_ta_block(req->block_id), + .sub_block_index = req->subblock_id, + .inject_error_type = __get_ras_ta_err_type(req->error_type), + .address = req->address, + .value = req->method, + }; + + ret = ras_psp_trigger_error(ras_core, &block_info, req->instance_mask); + if (!ret) { + output_data->version = 0; + output_data->address = block_info.address; + cmd->output_size = sizeof(struct ras_cmd_inject_error_rsp); + } else { + RAS_DEV_ERR(ras_core->dev, "ras inject block %u failed %d\n", req->block_id, ret); + ret = RAS_CMD__ERROR_ACCESS_DENIED; + } + + return ret; +} + +static struct ras_cmd_func_map ras_cmd_maps[] = { + {RAS_CMD__INJECT_ERROR, ras_cmd_inject_error}, + {RAS_CMD__GET_BLOCK_ECC_STATUS, ras_get_block_ecc_info}, + {RAS_CMD__GET_BAD_PAGES, ras_cmd_get_bad_pages}, + {RAS_CMD__CLEAR_BAD_PAGE_INFO, ras_cmd_clear_bad_page_info}, + {RAS_CMD__RESET_ALL_ERROR_COUNTS, ras_cmd_reset_all_error_counts}, + {RAS_CMD__GET_CPER_SNAPSHOT, ras_cmd_get_cper_snapshot}, + {RAS_CMD__GET_CPER_RECORD, ras_cmd_get_cper_records}, + {RAS_CMD__GET_BATCH_TRACE_SNAPSHOT, ras_cmd_get_batch_trace_snapshot}, + {RAS_CMD__GET_BATCH_TRACE_RECORD, ras_cmd_get_batch_trace_records}, +}; + +int rascore_handle_cmd(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_func_map *ras_cmd = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(ras_cmd_maps); i++) { + if (cmd->cmd_id == ras_cmd_maps[i].cmd_id) { + ras_cmd = &ras_cmd_maps[i]; + break; + } + } + + if (!ras_cmd) + return RAS_CMD__ERROR_UKNOWN_CMD; + + return ras_cmd->func(ras_core, cmd, data); +} + +int ras_cmd_init(struct ras_core_context *ras_core) +{ + return ras_cmd_add_device(ras_core); +} + +int ras_cmd_fini(struct ras_core_context *ras_core) +{ + ras_cmd_remove_device(ras_core); + return 0; +} + +int ras_cmd_query_interface_info(struct ras_core_context *ras_core, + struct ras_query_interface_info_rsp *rsp) +{ + rsp->ras_cmd_major_ver = RAS_CMD_MAJOR_VERSION; + rsp->ras_cmd_minor_ver = RAS_CMD_MINOR_VERSION; + + return 0; +} + +int ras_cmd_translate_soc_pa_to_bank(struct ras_core_context *ras_core, + uint64_t soc_pa, struct ras_fb_bank_addr *bank_addr) +{ + struct umc_bank_addr umc_bank = {0}; + int ret; + + ret = ras_umc_translate_soc_pa_and_bank(ras_core, &soc_pa, &umc_bank, false); + if (ret) + return RAS_CMD__ERROR_GENERIC; + + bank_addr->stack_id = umc_bank.stack_id; + bank_addr->bank_group = umc_bank.bank_group; + bank_addr->bank = umc_bank.bank; + bank_addr->row = umc_bank.row; + bank_addr->column = umc_bank.column; + bank_addr->channel = umc_bank.channel; + bank_addr->subchannel = umc_bank.subchannel; + + return 0; +} + +int ras_cmd_translate_bank_to_soc_pa(struct ras_core_context *ras_core, + struct ras_fb_bank_addr bank_addr, uint64_t *soc_pa) +{ + struct umc_bank_addr umc_bank = {0}; + + umc_bank.stack_id = bank_addr.stack_id; + umc_bank.bank_group = bank_addr.bank_group; + umc_bank.bank = bank_addr.bank; + umc_bank.row = bank_addr.row; + umc_bank.column = bank_addr.column; + umc_bank.channel = bank_addr.channel; + umc_bank.subchannel = bank_addr.subchannel; + + return ras_umc_translate_soc_pa_and_bank(ras_core, soc_pa, &umc_bank, true); +} + +uint64_t ras_cmd_get_dev_handle(struct ras_core_context *ras_core) +{ + return ras_core->ras_cmd.dev_handle; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h new file mode 100644 index 000000000000..6df8c70f5ad8 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h @@ -0,0 +1,425 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_CMD_H__ +#define __RAS_CMD_H__ +#include "ras.h" +#include "ras_eeprom.h" +#include "ras_log_ring.h" +#include "ras_cper.h" + +#define RAS_CMD_DEV_HANDLE_MAGIC 0xFEEDAD00UL + +#define RAS_CMD_MAX_IN_SIZE 256 +#define RAS_CMD_MAX_GPU_NUM 32 +#define RAS_CMD_MAX_BAD_PAGES_PER_GROUP 32 + +/* position of instance value in sub_block_index of + * ta_ras_trigger_error_input, the sub block uses lower 12 bits + */ +#define RAS_TA_INST_MASK 0xfffff000 +#define RAS_TA_INST_SHIFT 0xc + +enum ras_cmd_interface_type { + RAS_CMD_INTERFACE_TYPE_NONE, + RAS_CMD_INTERFACE_TYPE_AMDGPU, + RAS_CMD_INTERFACE_TYPE_VF, + RAS_CMD_INTERFACE_TYPE_PF, +}; + +enum ras_cmd_id_range { + RAS_CMD_ID_COMMON_START = 0, + RAS_CMD_ID_COMMON_END = 0x10000, + RAS_CMD_ID_AMDGPU_START = RAS_CMD_ID_COMMON_END, + RAS_CMD_ID_AMDGPU_END = 0x20000, + RAS_CMD_ID_MXGPU_START = RAS_CMD_ID_AMDGPU_END, + RAS_CMD_ID_MXGPU_END = 0x30000, + RAS_CMD_ID_MXGPU_VF_START = RAS_CMD_ID_MXGPU_END, + RAS_CMD_ID_MXGPU_VF_END = 0x40000, +}; + +enum ras_cmd_id { + RAS_CMD__BEGIN = RAS_CMD_ID_COMMON_START, + RAS_CMD__QUERY_INTERFACE_INFO, + RAS_CMD__GET_DEVICES_INFO, + RAS_CMD__GET_BLOCK_ECC_STATUS, + RAS_CMD__INJECT_ERROR, + RAS_CMD__GET_BAD_PAGES, + RAS_CMD__CLEAR_BAD_PAGE_INFO, + RAS_CMD__RESET_ALL_ERROR_COUNTS, + RAS_CMD__GET_SAFE_FB_ADDRESS_RANGES, + RAS_CMD__TRANSLATE_FB_ADDRESS, + RAS_CMD__GET_LINK_TOPOLOGY, + RAS_CMD__GET_CPER_SNAPSHOT, + RAS_CMD__GET_CPER_RECORD, + RAS_CMD__GET_BATCH_TRACE_SNAPSHOT, + RAS_CMD__GET_BATCH_TRACE_RECORD, + RAS_CMD__SUPPORTED_MAX = RAS_CMD_ID_COMMON_END, +}; + +enum ras_cmd_response { + RAS_CMD__SUCCESS = 0, + RAS_CMD__SUCCESS_EXEED_BUFFER, + RAS_CMD__ERROR_UKNOWN_CMD, + RAS_CMD__ERROR_INVALID_CMD, + RAS_CMD__ERROR_VERSION, + RAS_CMD__ERROR_INVALID_INPUT_SIZE, + RAS_CMD__ERROR_INVALID_INPUT_DATA, + RAS_CMD__ERROR_DRV_INIT_FAIL, + RAS_CMD__ERROR_ACCESS_DENIED, + RAS_CMD__ERROR_GENERIC, + RAS_CMD__ERROR_TIMEOUT, +}; + +enum ras_error_type { + RAS_TYPE_ERROR__NONE = 0, + RAS_TYPE_ERROR__PARITY = 1, + RAS_TYPE_ERROR__SINGLE_CORRECTABLE = 2, + RAS_TYPE_ERROR__MULTI_UNCORRECTABLE = 4, + RAS_TYPE_ERROR__POISON = 8, +}; + +struct ras_core_context; +struct ras_cmd_ioctl; + +struct ras_cmd_mgr { + struct list_head head; + struct ras_core_context *ras_core; + uint64_t dev_handle; +}; + +struct ras_cmd_func_map { + uint32_t cmd_id; + int (*func)(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data); +}; + +struct ras_device_bdf { + union { + struct { + uint32_t function : 3; + uint32_t device : 5; + uint32_t bus : 8; + uint32_t domain : 16; + }; + uint32_t u32_all; + }; +}; + +struct ras_cmd_param { + uint32_t idx_vf; + void *data; +}; + +#pragma pack(push, 8) +struct ras_cmd_ioctl { + uint32_t magic; + union { + struct { + uint16_t ras_cmd_minor_ver : 10; + uint16_t ras_cmd_major_ver : 6; + }; + uint16_t ras_cmd_ver; + }; + union { + struct { + uint16_t plat_major_ver : 10; + uint16_t plat_minor_ver : 6; + }; + uint16_t plat_ver; + }; + uint32_t cmd_id; + uint32_t cmd_res; + uint32_t input_size; + uint32_t output_size; + uint32_t reserved[6]; + uint8_t input_buff_raw[RAS_CMD_MAX_IN_SIZE]; + uint8_t output_buff_raw[]; +}; + +struct ras_cmd_dev_handle { + uint64_t dev_handle; +}; + +struct ras_cmd_block_ecc_info_req { + struct ras_cmd_dev_handle dev; + uint32_t block_id; + uint32_t subblock_id; + uint32_t reserved[4]; +}; + +struct ras_cmd_block_ecc_info_rsp { + uint32_t version; + uint32_t ce_count; + uint32_t ue_count; + uint32_t de_count; + uint32_t reserved[6]; +}; + +struct ras_cmd_inject_error_req { + struct ras_cmd_dev_handle dev; + uint32_t block_id; + uint32_t subblock_id; + uint64_t address; + uint32_t error_type; + uint32_t instance_mask; + union { + struct { + /* vf index */ + uint64_t vf_idx : 6; + /* method of error injection. i.e persistent, coherent etc */ + uint64_t method : 10; + uint64_t rsv : 48; + }; + uint64_t value; + }; + uint32_t reserved[8]; +}; + +struct ras_cmd_inject_error_rsp { + uint32_t version; + uint32_t reserved[5]; + uint64_t address; +}; + +struct ras_cmd_dev_info { + uint64_t dev_handle; + uint32_t location_id; + uint32_t ecc_enabled; + uint32_t ecc_supported; + uint32_t vf_num; + uint32_t asic_type; + uint32_t oam_id; + uint32_t reserved[8]; +}; + +struct ras_cmd_devices_info_rsp { + uint32_t version; + uint32_t dev_num; + uint32_t reserved[6]; + struct ras_cmd_dev_info devs[RAS_CMD_MAX_GPU_NUM]; +}; + +struct ras_cmd_bad_page_record { + union { + uint64_t address; + uint64_t offset; + }; + uint64_t retired_page; + uint64_t ts; + + uint32_t err_type; + + union { + unsigned char bank; + unsigned char cu; + }; + + unsigned char mem_channel; + unsigned char mcumc_id; + + unsigned char valid; + unsigned char reserved[8]; +}; + +struct ras_cmd_bad_pages_info_req { + struct ras_cmd_dev_handle device; + uint32_t group_index; + uint32_t reserved[5]; +}; + +struct ras_cmd_bad_pages_info_rsp { + uint32_t version; + uint32_t group_index; + uint32_t bp_in_group; + uint32_t bp_total_cnt; + uint32_t reserved[4]; + struct ras_cmd_bad_page_record records[RAS_CMD_MAX_BAD_PAGES_PER_GROUP]; +}; + +struct ras_query_interface_info_req { + uint32_t reserved[8]; +}; + +struct ras_query_interface_info_rsp { + uint32_t version; + uint32_t ras_cmd_major_ver; + uint32_t ras_cmd_minor_ver; + uint32_t plat_major_ver; + uint32_t plat_minor_ver; + uint8_t interface_type; + uint8_t rsv[3]; + uint32_t reserved[8]; +}; + +#define RAS_MAX_NUM_SAFE_RANGES 64 +struct ras_cmd_ras_safe_fb_address_ranges_rsp { + uint32_t version; + uint32_t num_ranges; + uint32_t reserved[4]; + struct { + uint64_t start; + uint64_t size; + uint32_t idx; + uint32_t reserved[3]; + } range[RAS_MAX_NUM_SAFE_RANGES]; +}; + +enum ras_fb_addr_type { + RAS_FB_ADDR_SOC_PHY, /* SPA */ + RAS_FB_ADDR_BANK, + RAS_FB_ADDR_VF_PHY, /* GPA */ + RAS_FB_ADDR_UNKNOWN +}; + +struct ras_fb_bank_addr { + uint32_t stack_id; /* SID */ + uint32_t bank_group; + uint32_t bank; + uint32_t row; + uint32_t column; + uint32_t channel; + uint32_t subchannel; /* Also called Pseudochannel (PC) */ + uint32_t reserved[3]; +}; + +struct ras_fb_vf_phy_addr { + uint32_t vf_idx; + uint32_t reserved; + uint64_t addr; +}; + +union ras_translate_fb_address { + struct ras_fb_bank_addr bank_addr; + uint64_t soc_phy_addr; + struct ras_fb_vf_phy_addr vf_phy_addr; +}; + +struct ras_cmd_translate_fb_address_req { + struct ras_cmd_dev_handle dev; + enum ras_fb_addr_type src_addr_type; + enum ras_fb_addr_type dest_addr_type; + union ras_translate_fb_address trans_addr; +}; + +struct ras_cmd_translate_fb_address_rsp { + uint32_t version; + uint32_t reserved[5]; + union ras_translate_fb_address trans_addr; +}; + +struct ras_dev_link_topology_req { + struct ras_cmd_dev_handle src; + struct ras_cmd_dev_handle dst; +}; + +struct ras_dev_link_topology_rsp { + uint32_t version; + uint32_t link_status; /* HW status of the link */ + uint32_t link_type; /* type of the link */ + uint32_t num_hops; /* number of hops */ + uint32_t reserved[8]; +}; + +struct ras_cmd_cper_snapshot_req { + struct ras_cmd_dev_handle dev; +}; + +struct ras_cmd_cper_snapshot_rsp { + uint32_t version; + uint32_t reserved[4]; + uint32_t total_cper_num; + uint64_t start_cper_id; + uint64_t latest_cper_id; +}; + +struct ras_cmd_cper_record_req { + struct ras_cmd_dev_handle dev; + uint64_t cper_start_id; + uint32_t cper_num; + uint32_t buf_size; + uint64_t buf_ptr; + uint32_t reserved[4]; +}; + +struct ras_cmd_cper_record_rsp { + uint32_t version; + uint32_t real_data_size; + uint32_t real_cper_num; + uint32_t remain_num; + uint32_t reserved[4]; +}; + +struct ras_cmd_batch_trace_snapshot_req { + struct ras_cmd_dev_handle dev; +}; + +struct ras_cmd_batch_trace_snapshot_rsp { + uint32_t version; + uint32_t reserved[4]; + uint32_t total_batch_num; + uint64_t start_batch_id; + uint64_t latest_batch_id; +}; + +struct ras_cmd_batch_trace_record_req { + struct ras_cmd_dev_handle dev; + uint64_t start_batch_id; + uint32_t batch_num; + uint32_t reserved[5]; +}; + +struct batch_ras_trace_info { + uint64_t batch_id; + uint16_t offset; + uint8_t trace_num; + uint8_t rsv; + uint32_t reserved; +}; + +#define RAS_CMD_MAX_BATCH_NUM 300 +#define RAS_CMD_MAX_TRACE_NUM 300 +struct ras_cmd_batch_trace_record_rsp { + uint32_t version; + uint16_t real_batch_num; + uint16_t remain_num; + uint64_t start_batch_id; + uint32_t reserved[2]; + struct batch_ras_trace_info batchs[RAS_CMD_MAX_BATCH_NUM]; + struct ras_log_info records[RAS_CMD_MAX_TRACE_NUM]; +}; + +#pragma pack(pop) + +int ras_cmd_init(struct ras_core_context *ras_core); +int ras_cmd_fini(struct ras_core_context *ras_core); +int rascore_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ioctl *cmd, void *data); +uint64_t ras_cmd_get_dev_handle(struct ras_core_context *ras_core); +int ras_cmd_query_interface_info(struct ras_core_context *ras_core, + struct ras_query_interface_info_rsp *rsp); +int ras_cmd_translate_soc_pa_to_bank(struct ras_core_context *ras_core, + uint64_t soc_pa, struct ras_fb_bank_addr *bank_addr); +int ras_cmd_translate_bank_to_soc_pa(struct ras_core_context *ras_core, + struct ras_fb_bank_addr bank_addr, uint64_t *soc_pa); +#endif -- cgit v1.2.3 From ea61341b9014bda9f59101f864692f0e26bfa326 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:33:58 +0800 Subject: drm/amd/ras: Add thread to handle ras events Add thread to handle ras events. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_process.c | 315 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_process.h | 53 +++++ 2 files changed, 368 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_process.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_process.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_process.c b/drivers/gpu/drm/amd/ras/rascore/ras_process.c new file mode 100644 index 000000000000..02f0657f78a3 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_process.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_process.h" + +#define RAS_EVENT_FIFO_SIZE (128 * sizeof(struct ras_event_req)) + +#define RAS_POLLING_ECC_TIMEOUT 300 + +static int ras_process_put_event(struct ras_core_context *ras_core, + struct ras_event_req *req) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + int ret; + + ret = kfifo_in_spinlocked(&ras_proc->event_fifo, + req, sizeof(*req), &ras_proc->fifo_spinlock); + if (!ret) { + RAS_DEV_ERR(ras_core->dev, "Poison message fifo is full!\n"); + return -ENOSPC; + } + + return 0; +} + +static int ras_process_add_reset_gpu_event(struct ras_core_context *ras_core, + uint32_t reset_cause) +{ + struct ras_event_req req = {0}; + + req.reset = reset_cause; + + return ras_process_put_event(ras_core, &req); +} + +static int ras_process_get_event(struct ras_core_context *ras_core, + struct ras_event_req *req) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + + return kfifo_out_spinlocked(&ras_proc->event_fifo, + req, sizeof(*req), &ras_proc->fifo_spinlock); +} + +static void ras_process_clear_event_fifo(struct ras_core_context *ras_core) +{ + struct ras_event_req req; + int ret; + + do { + ret = ras_process_get_event(ras_core, &req); + } while (ret); +} + +#define AMDGPU_RAS_WAITING_DATA_READY 200 +static int ras_process_umc_event(struct ras_core_context *ras_core, + uint32_t event_count) +{ + struct ras_ecc_count ecc_data; + int ret = 0; + uint32_t timeout = 0; + uint32_t detected_de_count = 0; + + do { + memset(&ecc_data, 0, sizeof(ecc_data)); + ret = ras_core_update_ecc_info(ras_core); + if (ret) + return ret; + + ret = ras_core_query_block_ecc_data(ras_core, RAS_BLOCK_ID__UMC, &ecc_data); + if (ret) + return ret; + + if (ecc_data.new_de_count) { + detected_de_count += ecc_data.new_de_count; + timeout = 0; + } else { + if (!timeout && event_count) + timeout = AMDGPU_RAS_WAITING_DATA_READY; + + if (timeout) { + if (!--timeout) + break; + + msleep(1); + } + } + } while (detected_de_count < event_count); + + if (detected_de_count && ras_core_gpu_is_rma(ras_core)) + ras_process_add_reset_gpu_event(ras_core, GPU_RESET_CAUSE_RMA); + + return 0; +} + +static int ras_process_non_umc_event(struct ras_core_context *ras_core) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + struct ras_event_req req; + uint32_t event_count = kfifo_len(&ras_proc->event_fifo); + uint32_t reset_flags = 0; + int ret = 0, i; + + for (i = 0; i < event_count; i++) { + memset(&req, 0, sizeof(req)); + ret = ras_process_get_event(ras_core, &req); + if (!ret) + continue; + + ras_core_event_notify(ras_core, + RAS_EVENT_ID__POISON_CONSUMPTION, &req); + + reset_flags |= req.reset; + + if (req.reset == GPU_RESET_CAUSE_RMA) + continue; + + if (req.reset) + RAS_DEV_INFO(ras_core->dev, + "{%llu} GPU reset for %s RAS poison consumption is issued!\n", + req.seqno, ras_core_get_ras_block_name(req.block)); + else + RAS_DEV_INFO(ras_core->dev, + "{%llu} %s RAS poison consumption is issued!\n", + req.seqno, ras_core_get_ras_block_name(req.block)); + } + + if (reset_flags) { + ret = ras_core_event_notify(ras_core, + RAS_EVENT_ID__RESET_GPU, &reset_flags); + if (!ret && (reset_flags & GPU_RESET_CAUSE_RMA)) + return -RAS_CORE_GPU_IN_MODE1_RESET; + } + + return ret; +} + +int ras_process_handle_ras_event(struct ras_core_context *ras_core) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + uint32_t umc_event_count; + int ret; + + ras_aca_clear_fatal_flag(ras_core); + ras_umc_log_pending_bad_bank(ras_core); + + do { + umc_event_count = atomic_read(&ras_proc->umc_interrupt_count); + ret = ras_process_umc_event(ras_core, umc_event_count); + if (ret == -RAS_CORE_GPU_IN_MODE1_RESET) + break; + + if (umc_event_count) + atomic_sub(umc_event_count, &ras_proc->umc_interrupt_count); + } while (atomic_read(&ras_proc->umc_interrupt_count)); + + if ((ret != -RAS_CORE_GPU_IN_MODE1_RESET) && + (kfifo_len(&ras_proc->event_fifo))) + ret = ras_process_non_umc_event(ras_core); + + if (ret == -RAS_CORE_GPU_IN_MODE1_RESET) { + /* Clear poison fifo */ + ras_process_clear_event_fifo(ras_core); + atomic_set(&ras_proc->umc_interrupt_count, 0); + } + + return ret; +} + +static int thread_wait_condition(void *param) +{ + struct ras_process *ras_proc = (struct ras_process *)param; + + return (kthread_should_stop() || + atomic_read(&ras_proc->ras_interrupt_req)); +} + +static int ras_process_thread(void *context) +{ + struct ras_core_context *ras_core = (struct ras_core_context *)context; + struct ras_process *ras_proc = &ras_core->ras_proc; + + while (!kthread_should_stop()) { + ras_wait_event_interruptible_timeout(&ras_proc->ras_process_wq, + thread_wait_condition, ras_proc, + msecs_to_jiffies(RAS_POLLING_ECC_TIMEOUT)); + + if (kthread_should_stop()) + break; + + if (!ras_core->is_initialized) + continue; + + atomic_set(&ras_proc->ras_interrupt_req, 0); + + if (ras_core_gpu_in_reset(ras_core)) + continue; + + if (ras_core->sys_fn && ras_core->sys_fn->async_handle_ras_event) + ras_core->sys_fn->async_handle_ras_event(ras_core, NULL); + else + ras_process_handle_ras_event(ras_core); + } + + return 0; +} + +int ras_process_init(struct ras_core_context *ras_core) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + int ret; + + ret = kfifo_alloc(&ras_proc->event_fifo, RAS_EVENT_FIFO_SIZE, GFP_KERNEL); + if (ret) + return ret; + + spin_lock_init(&ras_proc->fifo_spinlock); + + init_waitqueue_head(&ras_proc->ras_process_wq); + + ras_proc->ras_process_thread = kthread_run(ras_process_thread, + (void *)ras_core, "ras_process_thread"); + if (!ras_proc->ras_process_thread) { + RAS_DEV_ERR(ras_core->dev, "Failed to create ras_process_thread.\n"); + ret = -ENOMEM; + goto err; + } + + return 0; + +err: + ras_process_fini(ras_core); + return ret; +} + +int ras_process_fini(struct ras_core_context *ras_core) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + + if (ras_proc->ras_process_thread) { + kthread_stop(ras_proc->ras_process_thread); + ras_proc->ras_process_thread = NULL; + } + + kfifo_free(&ras_proc->event_fifo); + + return 0; +} + +static int ras_process_add_umc_interrupt_req(struct ras_core_context *ras_core, + struct ras_event_req *req) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + + atomic_inc(&ras_proc->umc_interrupt_count); + atomic_inc(&ras_proc->ras_interrupt_req); + + wake_up(&ras_proc->ras_process_wq); + return 0; +} + +static int ras_process_add_non_umc_interrupt_req(struct ras_core_context *ras_core, + struct ras_event_req *req) +{ + struct ras_process *ras_proc = &ras_core->ras_proc; + int ret; + + ret = ras_process_put_event(ras_core, req); + if (!ret) { + atomic_inc(&ras_proc->ras_interrupt_req); + wake_up(&ras_proc->ras_process_wq); + } + + return ret; +} + +int ras_process_add_interrupt_req(struct ras_core_context *ras_core, + struct ras_event_req *req, bool is_umc) +{ + int ret; + + if (!ras_core) + return -EINVAL; + + if (!ras_core->is_initialized) + return -EPERM; + + if (is_umc) + ret = ras_process_add_umc_interrupt_req(ras_core, req); + else + ret = ras_process_add_non_umc_interrupt_req(ras_core, req); + + return ret; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_process.h b/drivers/gpu/drm/amd/ras/rascore/ras_process.h new file mode 100644 index 000000000000..28458b50510e --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_process.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_PROCESS_H__ +#define __RAS_PROCESS_H__ + +struct ras_event_req { + uint64_t seqno; + uint32_t idx_vf; + uint32_t block; + uint16_t pasid; + uint32_t reset; + void *pasid_fn; + void *data; +}; + +struct ras_process { + void *dev; + void *ras_process_thread; + wait_queue_head_t ras_process_wq; + atomic_t ras_interrupt_req; + atomic_t umc_interrupt_count; + struct kfifo event_fifo; + spinlock_t fifo_spinlock; +}; + +struct ras_core_context; +int ras_process_init(struct ras_core_context *ras_core); +int ras_process_fini(struct ras_core_context *ras_core); +int ras_process_handle_ras_event(struct ras_core_context *ras_core); +int ras_process_add_interrupt_req(struct ras_core_context *ras_core, + struct ras_event_req *req, bool is_umc); +#endif -- cgit v1.2.3 From 0ec9ed84fb57e216d4f5cc8ebc5e9f5d82c9b17e Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:37:35 +0800 Subject: drm/amd/ras: Use ring buffer to record ras ecc data Use ring buffer to record ras ecc data. V3: Change commit message and rename the file and function names. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c | 310 +++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h | 93 ++++++++ 2 files changed, 403 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c new file mode 100644 index 000000000000..bca094058f91 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_core_status.h" +#include "ras_log_ring.h" + +#define RAS_LOG_MAX_QUERY_SIZE 0xC000 +#define RAS_LOG_MEM_TEMP_SIZE 0x200 +#define RAS_LOG_MEMPOOL_SIZE \ + (RAS_LOG_MAX_QUERY_SIZE + RAS_LOG_MEM_TEMP_SIZE) + +#define BATCH_IDX_TO_TREE_IDX(batch_idx, sn) (((batch_idx) << 8) | (sn)) + +static const uint64_t ras_rma_aca_reg[ACA_REG_MAX_COUNT] = { + [ACA_REG_IDX__CTL] = 0x1, + [ACA_REG_IDX__STATUS] = 0xB000000000000137, + [ACA_REG_IDX__ADDR] = 0x0, + [ACA_REG_IDX__MISC0] = 0x0, + [ACA_REG_IDX__CONFG] = 0x1ff00000002, + [ACA_REG_IDX__IPID] = 0x9600000000, + [ACA_REG_IDX__SYND] = 0x0, +}; + +static uint64_t ras_log_ring_get_logged_ecc_count(struct ras_core_context *ras_core) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + uint64_t count = 0; + + if (log_ring->logged_ecc_count < 0) { + RAS_DEV_WARN(ras_core->dev, + "Error: the logged ras count should not less than 0!\n"); + count = 0; + } else { + count = log_ring->logged_ecc_count; + } + + if (count > RAS_LOG_MEMPOOL_SIZE) + RAS_DEV_WARN(ras_core->dev, + "Error: the logged ras count is out of range!\n"); + + return count; +} + +static int ras_log_ring_add_data(struct ras_core_context *ras_core, + struct ras_log_info *log, struct ras_log_batch_tag *batch_tag) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + unsigned long flags = 0; + int ret; + + if (batch_tag && (batch_tag->sub_seqno >= MAX_RECORD_PER_BATCH)) { + RAS_DEV_ERR(ras_core->dev, + "Invalid batch sub seqno:%d, batch:0x%llx\n", + batch_tag->sub_seqno, batch_tag->batch_id); + return -EINVAL; + } + + spin_lock_irqsave(&log_ring->spin_lock, flags); + if (batch_tag) { + log->seqno = + BATCH_IDX_TO_TREE_IDX(batch_tag->batch_id, batch_tag->sub_seqno); + batch_tag->sub_seqno++; + } else { + log->seqno = BATCH_IDX_TO_TREE_IDX(log_ring->mono_upward_batch_id, 0); + log_ring->mono_upward_batch_id++; + } + ret = radix_tree_insert(&log_ring->ras_log_root, log->seqno, log); + if (!ret) + log_ring->logged_ecc_count++; + spin_unlock_irqrestore(&log_ring->spin_lock, flags); + + if (ret) { + RAS_DEV_ERR(ras_core->dev, + "Failed to add ras log! seqno:0x%llx, ret:%d\n", + log->seqno, ret); + mempool_free(log, log_ring->ras_log_mempool); + } + + return ret; +} + +static int ras_log_ring_delete_data(struct ras_core_context *ras_core, uint32_t count) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + unsigned long flags = 0; + uint32_t i = 0, j = 0; + uint64_t batch_id, idx; + void *data; + int ret = -ENODATA; + + if (count > ras_log_ring_get_logged_ecc_count(ras_core)) + return -EINVAL; + + spin_lock_irqsave(&log_ring->spin_lock, flags); + batch_id = log_ring->last_del_batch_id; + while (batch_id < log_ring->mono_upward_batch_id) { + for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { + idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); + data = radix_tree_delete(&log_ring->ras_log_root, idx); + if (data) { + mempool_free(data, log_ring->ras_log_mempool); + log_ring->logged_ecc_count--; + i++; + } + } + batch_id = ++log_ring->last_del_batch_id; + if (i >= count) { + ret = 0; + break; + } + } + spin_unlock_irqrestore(&log_ring->spin_lock, flags); + + return ret; +} + +static void ras_log_ring_clear_log_tree(struct ras_core_context *ras_core) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + uint64_t batch_id, idx; + unsigned long flags = 0; + void *data; + int j; + + if ((log_ring->mono_upward_batch_id <= log_ring->last_del_batch_id) && + !log_ring->logged_ecc_count) + return; + + spin_lock_irqsave(&log_ring->spin_lock, flags); + batch_id = log_ring->last_del_batch_id; + while (batch_id < log_ring->mono_upward_batch_id) { + for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { + idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); + data = radix_tree_delete(&log_ring->ras_log_root, idx); + if (data) { + mempool_free(data, log_ring->ras_log_mempool); + log_ring->logged_ecc_count--; + } + } + batch_id++; + } + spin_unlock_irqrestore(&log_ring->spin_lock, flags); + +} + +int ras_log_ring_sw_init(struct ras_core_context *ras_core) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + + memset(log_ring, 0, sizeof(*log_ring)); + + log_ring->ras_log_mempool = mempool_create_kmalloc_pool( + RAS_LOG_MEMPOOL_SIZE, sizeof(struct ras_log_info)); + if (!log_ring->ras_log_mempool) + return -ENOMEM; + + INIT_RADIX_TREE(&log_ring->ras_log_root, GFP_KERNEL); + + spin_lock_init(&log_ring->spin_lock); + + return 0; +} + +int ras_log_ring_sw_fini(struct ras_core_context *ras_core) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + + ras_log_ring_clear_log_tree(ras_core); + log_ring->logged_ecc_count = 0; + log_ring->last_del_batch_id = 0; + log_ring->mono_upward_batch_id = 0; + + mempool_destroy(log_ring->ras_log_mempool); + + return 0; +} + +struct ras_log_batch_tag *ras_log_ring_create_batch_tag(struct ras_core_context *ras_core) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + struct ras_log_batch_tag *batch_tag; + unsigned long flags; + + batch_tag = kzalloc(sizeof(*batch_tag), GFP_KERNEL); + if (!batch_tag) + return NULL; + + spin_lock_irqsave(&log_ring->spin_lock, flags); + batch_tag->batch_id = log_ring->mono_upward_batch_id; + log_ring->mono_upward_batch_id++; + spin_unlock_irqrestore(&log_ring->spin_lock, flags); + + batch_tag->sub_seqno = 0; + batch_tag->timestamp = ras_core_get_utc_second_timestamp(ras_core); + return batch_tag; +} + +void ras_log_ring_destroy_batch_tag(struct ras_core_context *ras_core, + struct ras_log_batch_tag *batch_tag) +{ + kfree(batch_tag); +} + +void ras_log_ring_add_log_event(struct ras_core_context *ras_core, + enum ras_log_event event, void *data, struct ras_log_batch_tag *batch_tag) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + struct ras_log_info *log; + void *obj; + + obj = mempool_alloc_preallocated(log_ring->ras_log_mempool); + if (!obj || + (ras_log_ring_get_logged_ecc_count(ras_core) >= RAS_LOG_MEMPOOL_SIZE)) { + ras_log_ring_delete_data(ras_core, RAS_LOG_MEM_TEMP_SIZE); + if (!obj) + obj = mempool_alloc_preallocated(log_ring->ras_log_mempool); + } + + if (!obj) { + RAS_DEV_ERR(ras_core->dev, "ERROR: Failed to alloc ras log buffer!\n"); + return; + } + + log = (struct ras_log_info *)obj; + + memset(log, 0, sizeof(*log)); + log->timestamp = + batch_tag ? batch_tag->timestamp : ras_core_get_utc_second_timestamp(ras_core); + log->event = event; + + if (data) + memcpy(&log->aca_reg, data, sizeof(log->aca_reg)); + + if (event == RAS_LOG_EVENT_RMA) + memcpy(&log->aca_reg, ras_rma_aca_reg, sizeof(log->aca_reg)); + + ras_log_ring_add_data(ras_core, log, batch_tag); +} + +static struct ras_log_info *ras_log_ring_lookup_data(struct ras_core_context *ras_core, + uint64_t idx) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + unsigned long flags = 0; + void *data; + + spin_lock_irqsave(&log_ring->spin_lock, flags); + data = radix_tree_lookup(&log_ring->ras_log_root, idx); + spin_unlock_irqrestore(&log_ring->spin_lock, flags); + + return (struct ras_log_info *)data; +} + +int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t batch_id, + struct ras_log_info **log_arr, uint32_t arr_num) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + uint32_t i, idx, count = 0; + void *data; + + if ((batch_id >= log_ring->mono_upward_batch_id) || + (batch_id < log_ring->last_del_batch_id)) + return -EINVAL; + + for (i = 0; i < MAX_RECORD_PER_BATCH; i++) { + idx = BATCH_IDX_TO_TREE_IDX(batch_id, i); + data = ras_log_ring_lookup_data(ras_core, idx); + if (data) { + log_arr[count++] = data; + if (count >= arr_num) + break; + } + } + + return count; +} + +int ras_log_ring_get_batch_overview(struct ras_core_context *ras_core, + struct ras_log_batch_overview *overview) +{ + struct ras_log_ring *log_ring = &ras_core->ras_log_ring; + + overview->logged_batch_count = + log_ring->mono_upward_batch_id - log_ring->last_del_batch_id; + overview->last_batch_id = log_ring->mono_upward_batch_id; + overview->first_batch_id = log_ring->last_del_batch_id; + + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h new file mode 100644 index 000000000000..0ff6cc35678d --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_LOG_RING_H__ +#define __RAS_LOG_RING_H__ +#include "ras_aca.h" + +#define MAX_RECORD_PER_BATCH 32 + +#define RAS_LOG_SEQNO_TO_BATCH_IDX(seqno) ((seqno) >> 8) + +enum ras_log_event { + RAS_LOG_EVENT_NONE, + RAS_LOG_EVENT_UE, + RAS_LOG_EVENT_DE, + RAS_LOG_EVENT_CE, + RAS_LOG_EVENT_POISON_CREATION, + RAS_LOG_EVENT_POISON_CONSUMPTION, + RAS_LOG_EVENT_RMA, + RAS_LOG_EVENT_COUNT_MAX, +}; + +struct ras_aca_reg { + uint64_t regs[ACA_REG_MAX_COUNT]; +}; + +struct ras_log_info { + uint64_t seqno; + uint64_t timestamp; + enum ras_log_event event; + union { + struct ras_aca_reg aca_reg; + }; +}; + +struct ras_log_batch_tag { + uint64_t batch_id; + uint64_t timestamp; + uint32_t sub_seqno; +}; + +struct ras_log_ring { + void *ras_log_mempool; + struct radix_tree_root ras_log_root; + spinlock_t spin_lock; + uint64_t mono_upward_batch_id; + uint64_t last_del_batch_id; + int logged_ecc_count; +}; + +struct ras_log_batch_overview { + uint64_t first_batch_id; + uint64_t last_batch_id; + uint32_t logged_batch_count; +}; + +struct ras_core_context; + +int ras_log_ring_sw_init(struct ras_core_context *ras_core); +int ras_log_ring_sw_fini(struct ras_core_context *ras_core); + +struct ras_log_batch_tag *ras_log_ring_create_batch_tag(struct ras_core_context *ras_core); +void ras_log_ring_destroy_batch_tag(struct ras_core_context *ras_core, + struct ras_log_batch_tag *tag); +void ras_log_ring_add_log_event(struct ras_core_context *ras_core, + enum ras_log_event event, void *data, struct ras_log_batch_tag *tag); + +int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t batch_idx, + struct ras_log_info **log_arr, uint32_t arr_num); + +int ras_log_ring_get_batch_overview(struct ras_core_context *ras_core, + struct ras_log_batch_overview *overview); +#endif -- cgit v1.2.3 From 54ad42c23d5e861b3e41988bd9666d1690cd48cf Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:42:17 +0800 Subject: drm/amd/ras: Add cper conversion function Add cper conversion function. V3: Change commit message and update the calling function. Signed-off-by: YiPeng Chai Signed-off-by: Xiang Liu Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras_cper.c | 310 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_cper.h | 304 ++++++++++++++++++++++++++++ 2 files changed, 614 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_cper.c create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_cper.h diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cper.c b/drivers/gpu/drm/amd/ras/rascore/ras_cper.c new file mode 100644 index 000000000000..2343991adccf --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cper.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_core_status.h" +#include "ras_log_ring.h" +#include "ras_cper.h" + +static const struct ras_cper_guid MCE = CPER_NOTIFY__MCE; +static const struct ras_cper_guid CMC = CPER_NOTIFY__CMC; +static const struct ras_cper_guid BOOT = BOOT__TYPE; + +static const struct ras_cper_guid CRASHDUMP = GPU__CRASHDUMP; +static const struct ras_cper_guid RUNTIME = GPU__NONSTANDARD_ERROR; + +static void cper_get_timestamp(struct ras_core_context *ras_core, + struct ras_cper_timestamp *timestamp, uint64_t utc_second_timestamp) +{ + struct ras_time tm = {0}; + + ras_core_convert_timestamp_to_time(ras_core, utc_second_timestamp, &tm); + timestamp->seconds = tm.tm_sec; + timestamp->minutes = tm.tm_min; + timestamp->hours = tm.tm_hour; + timestamp->flag = 0; + timestamp->day = tm.tm_mday; + timestamp->month = tm.tm_mon; + timestamp->year = tm.tm_year % 100; + timestamp->century = tm.tm_year / 100; +} + +static void fill_section_hdr(struct ras_core_context *ras_core, + struct cper_section_hdr *hdr, enum ras_cper_type type, + enum ras_cper_severity sev, struct ras_log_info *trace) +{ + struct device_system_info dev_info = {0}; + char record_id[16]; + + hdr->signature[0] = 'C'; + hdr->signature[1] = 'P'; + hdr->signature[2] = 'E'; + hdr->signature[3] = 'R'; + hdr->revision = CPER_HDR__REV_1; + hdr->signature_end = 0xFFFFFFFF; + hdr->error_severity = sev; + + hdr->valid_bits.platform_id = 1; + hdr->valid_bits.partition_id = 1; + hdr->valid_bits.timestamp = 1; + + ras_core_get_device_system_info(ras_core, &dev_info); + + cper_get_timestamp(ras_core, &hdr->timestamp, trace->timestamp); + + snprintf(record_id, 9, "%d:%llX", dev_info.socket_id, + RAS_LOG_SEQNO_TO_BATCH_IDX(trace->seqno)); + memcpy(hdr->record_id, record_id, 8); + + snprintf(hdr->platform_id, 16, "0x%04X:0x%04X", + dev_info.vendor_id, dev_info.device_id); + /* pmfw version should be part of creator_id according to CPER spec */ + snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID__AMDGPU); + + switch (type) { + case RAS_CPER_TYPE_BOOT: + hdr->notify_type = BOOT; + break; + case RAS_CPER_TYPE_FATAL: + case RAS_CPER_TYPE_RMA: + hdr->notify_type = MCE; + break; + case RAS_CPER_TYPE_RUNTIME: + if (sev == RAS_CPER_SEV_NON_FATAL_CE) + hdr->notify_type = CMC; + else + hdr->notify_type = MCE; + break; + default: + RAS_DEV_ERR(ras_core->dev, "Unknown CPER Type\n"); + break; + } +} + +static int fill_section_descriptor(struct ras_core_context *ras_core, + struct cper_section_descriptor *descriptor, + enum ras_cper_severity sev, + struct ras_cper_guid sec_type, + uint32_t section_offset, + uint32_t section_length) +{ + struct device_system_info dev_info = {0}; + + descriptor->revision_minor = CPER_SEC__MINOR_REV_1; + descriptor->revision_major = CPER_SEC__MAJOR_REV_22; + descriptor->sec_offset = section_offset; + descriptor->sec_length = section_length; + descriptor->valid_bits.fru_text = 1; + descriptor->flag_bits.primary = 1; + descriptor->severity = sev; + descriptor->sec_type = sec_type; + + ras_core_get_device_system_info(ras_core, &dev_info); + + snprintf(descriptor->fru_text, 20, "OAM%d", dev_info.socket_id); + + if (sev == RAS_CPER_SEV_RMA) + descriptor->flag_bits.exceed_err_threshold = 1; + + if (sev == RAS_CPER_SEV_NON_FATAL_UE) + descriptor->flag_bits.latent_err = 1; + + return 0; +} + +static int fill_section_fatal(struct ras_core_context *ras_core, + struct cper_section_fatal *fatal, struct ras_log_info *trace) +{ + fatal->data.reg_ctx_type = CPER_CTX_TYPE__CRASH; + fatal->data.reg_arr_size = sizeof(fatal->data.reg); + + fatal->data.reg.status = trace->aca_reg.regs[RAS_CPER_ACA_REG_STATUS]; + fatal->data.reg.addr = trace->aca_reg.regs[RAS_CPER_ACA_REG_ADDR]; + fatal->data.reg.ipid = trace->aca_reg.regs[RAS_CPER_ACA_REG_IPID]; + fatal->data.reg.synd = trace->aca_reg.regs[RAS_CPER_ACA_REG_SYND]; + + return 0; +} + +static int fill_section_runtime(struct ras_core_context *ras_core, + struct cper_section_runtime *runtime, struct ras_log_info *trace) +{ + runtime->hdr.valid_bits.err_info_cnt = 1; + runtime->hdr.valid_bits.err_context_cnt = 1; + + runtime->descriptor.error_type = RUNTIME; + runtime->descriptor.ms_chk_bits.err_type_valid = 1; + + runtime->reg.reg_ctx_type = CPER_CTX_TYPE__CRASH; + runtime->reg.reg_arr_size = sizeof(runtime->reg.reg_dump); + + runtime->reg.reg_dump[RAS_CPER_ACA_REG_CTL] = trace->aca_reg.regs[ACA_REG_IDX__CTL]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_STATUS] = trace->aca_reg.regs[ACA_REG_IDX__STATUS]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_ADDR] = trace->aca_reg.regs[ACA_REG_IDX__ADDR]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_MISC0] = trace->aca_reg.regs[ACA_REG_IDX__MISC0]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_CONFIG] = trace->aca_reg.regs[ACA_REG_IDX__CONFG]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_IPID] = trace->aca_reg.regs[ACA_REG_IDX__IPID]; + runtime->reg.reg_dump[RAS_CPER_ACA_REG_SYND] = trace->aca_reg.regs[ACA_REG_IDX__SYND]; + + return 0; +} + +static int cper_generate_runtime_record(struct ras_core_context *ras_core, + struct cper_section_hdr *hdr, struct ras_log_info **trace_arr, uint32_t arr_num, + enum ras_cper_severity sev) +{ + struct cper_section_descriptor *descriptor; + struct cper_section_runtime *runtime; + int i; + + fill_section_hdr(ras_core, hdr, RAS_CPER_TYPE_RUNTIME, sev, trace_arr[0]); + hdr->record_length = RAS_HDR_LEN + ((RAS_SEC_DESC_LEN + RAS_NONSTD_SEC_LEN) * arr_num); + hdr->sec_cnt = arr_num; + for (i = 0; i < arr_num; i++) { + descriptor = (struct cper_section_descriptor *)((uint8_t *)hdr + + RAS_SEC_DESC_OFFSET(i)); + runtime = (struct cper_section_runtime *)((uint8_t *)hdr + + RAS_NONSTD_SEC_OFFSET(hdr->sec_cnt, i)); + + fill_section_descriptor(ras_core, descriptor, sev, RUNTIME, + RAS_NONSTD_SEC_OFFSET(hdr->sec_cnt, i), + sizeof(struct cper_section_runtime)); + fill_section_runtime(ras_core, runtime, trace_arr[i]); + } + + return 0; +} + +static int cper_generate_fatal_record(struct ras_core_context *ras_core, + uint8_t *buffer, struct ras_log_info **trace_arr, uint32_t arr_num) +{ + struct ras_cper_fatal_record record = {0}; + int i = 0; + + for (i = 0; i < arr_num; i++) { + fill_section_hdr(ras_core, &record.hdr, RAS_CPER_TYPE_FATAL, + RAS_CPER_SEV_FATAL_UE, trace_arr[i]); + record.hdr.record_length = RAS_HDR_LEN + RAS_SEC_DESC_LEN + RAS_FATAL_SEC_LEN; + record.hdr.sec_cnt = 1; + + fill_section_descriptor(ras_core, &record.descriptor, RAS_CPER_SEV_FATAL_UE, + CRASHDUMP, offsetof(struct ras_cper_fatal_record, fatal), + sizeof(struct cper_section_fatal)); + + fill_section_fatal(ras_core, &record.fatal, trace_arr[i]); + + memcpy(buffer + (i * record.hdr.record_length), + &record, record.hdr.record_length); + } + + return 0; +} + +static int cper_get_record_size(enum ras_cper_type type, uint16_t section_count) +{ + int size = 0; + + size += RAS_HDR_LEN; + size += (RAS_SEC_DESC_LEN * section_count); + + switch (type) { + case RAS_CPER_TYPE_RUNTIME: + case RAS_CPER_TYPE_RMA: + size += (RAS_NONSTD_SEC_LEN * section_count); + break; + case RAS_CPER_TYPE_FATAL: + size += (RAS_FATAL_SEC_LEN * section_count); + size += (RAS_HDR_LEN * (section_count - 1)); + break; + case RAS_CPER_TYPE_BOOT: + size += (RAS_BOOT_SEC_LEN * section_count); + break; + default: + /* should never reach here */ + break; + } + + return size; +} + +static enum ras_cper_type cper_ras_log_event_to_cper_type(enum ras_log_event event) +{ + switch (event) { + case RAS_LOG_EVENT_UE: + return RAS_CPER_TYPE_FATAL; + case RAS_LOG_EVENT_DE: + case RAS_LOG_EVENT_CE: + case RAS_LOG_EVENT_POISON_CREATION: + case RAS_LOG_EVENT_POISON_CONSUMPTION: + return RAS_CPER_TYPE_RUNTIME; + case RAS_LOG_EVENT_RMA: + return RAS_CPER_TYPE_RMA; + default: + /* should never reach here */ + return RAS_CPER_TYPE_RUNTIME; + } +} + +int ras_cper_generate_cper(struct ras_core_context *ras_core, + struct ras_log_info **trace_list, uint32_t count, + uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len) +{ + uint8_t *buffer = buf; + uint64_t buf_size = buf_len; + int record_size, saved_size = 0; + struct cper_section_hdr *hdr; + + /* All the batch traces share the same event */ + record_size = cper_get_record_size( + cper_ras_log_event_to_cper_type(trace_list[0]->event), count); + + if ((record_size + saved_size) > buf_size) + return -ENOMEM; + + hdr = (struct cper_section_hdr *)(buffer + saved_size); + + switch (trace_list[0]->event) { + case RAS_LOG_EVENT_RMA: + cper_generate_runtime_record(ras_core, hdr, trace_list, count, RAS_CPER_SEV_RMA); + break; + case RAS_LOG_EVENT_DE: + cper_generate_runtime_record(ras_core, + hdr, trace_list, count, RAS_CPER_SEV_NON_FATAL_UE); + break; + case RAS_LOG_EVENT_CE: + cper_generate_runtime_record(ras_core, + hdr, trace_list, count, RAS_CPER_SEV_NON_FATAL_CE); + break; + case RAS_LOG_EVENT_UE: + cper_generate_fatal_record(ras_core, buffer + saved_size, trace_list, count); + break; + default: + RAS_DEV_WARN(ras_core->dev, "Unprocessed trace event: %d\n", trace_list[0]->event); + break; + } + + saved_size += record_size; + + *real_data_len = saved_size; + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cper.h b/drivers/gpu/drm/amd/ras/rascore/ras_cper.h new file mode 100644 index 000000000000..076c1883c1ce --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cper.h @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __RAS_CPER_H__ +#define __RAS_CPER_H__ + +#define CPER_UUID_MAX_SIZE 16 +struct ras_cper_guid { + uint8_t b[CPER_UUID_MAX_SIZE]; +}; + +#define CPER_GUID__INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + ((struct ras_cper_guid) \ + {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define CPER_HDR__REV_1 (0x100) +#define CPER_SEC__MINOR_REV_1 (0x01) +#define CPER_SEC__MAJOR_REV_22 (0x22) +#define CPER_OAM_MAX_COUNT (8) + +#define CPER_CTX_TYPE__CRASH (1) +#define CPER_CTX_TYPE__BOOT (9) + +#define CPER_CREATOR_ID__AMDGPU "amdgpu" + +#define CPER_NOTIFY__MCE \ + CPER_GUID__INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \ + 0xE1, 0x49, 0x13, 0xBB) +#define CPER_NOTIFY__CMC \ + CPER_GUID__INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \ + 0xEB, 0xD4, 0xF8, 0x90) +#define BOOT__TYPE \ + CPER_GUID__INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \ + 0xD4, 0x64, 0xB3, 0x8F) + +#define GPU__CRASHDUMP \ + CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0xB0, 0xD0, 0x73, 0x65, \ + 0x72, 0x5F, 0xD6, 0xAE) +#define GPU__NONSTANDARD_ERROR \ + CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0x81, 0xA2, 0xAC, 0x69, \ + 0x17, 0x80, 0x55, 0x1D) +#define PROC_ERR__SECTION_TYPE \ + CPER_GUID__INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \ + 0x24, 0x2B, 0x6E, 0x1D) + +enum ras_cper_type { + RAS_CPER_TYPE_RUNTIME, + RAS_CPER_TYPE_FATAL, + RAS_CPER_TYPE_BOOT, + RAS_CPER_TYPE_RMA, +}; + +enum ras_cper_severity { + RAS_CPER_SEV_NON_FATAL_UE = 0, + RAS_CPER_SEV_FATAL_UE = 1, + RAS_CPER_SEV_NON_FATAL_CE = 2, + RAS_CPER_SEV_RMA = 3, + + RAS_CPER_SEV_UNUSED = 10, +}; + +enum ras_cper_aca_reg { + RAS_CPER_ACA_REG_CTL = 0, + RAS_CPER_ACA_REG_STATUS = 1, + RAS_CPER_ACA_REG_ADDR = 2, + RAS_CPER_ACA_REG_MISC0 = 3, + RAS_CPER_ACA_REG_CONFIG = 4, + RAS_CPER_ACA_REG_IPID = 5, + RAS_CPER_ACA_REG_SYND = 6, + RAS_CPER_ACA_REG_DESTAT = 8, + RAS_CPER_ACA_REG_DEADDR = 9, + RAS_CPER_ACA_REG_MASK = 10, + + RAS_CPER_ACA_REG_COUNT = 16, +}; + +#pragma pack(push, 1) + +struct ras_cper_timestamp { + uint8_t seconds; + uint8_t minutes; + uint8_t hours; + uint8_t flag; + uint8_t day; + uint8_t month; + uint8_t year; + uint8_t century; +}; + +struct cper_section_hdr { + char signature[4]; /* "CPER" */ + uint16_t revision; + uint32_t signature_end; /* 0xFFFFFFFF */ + uint16_t sec_cnt; + enum ras_cper_severity error_severity; + union { + struct { + uint32_t platform_id : 1; + uint32_t timestamp : 1; + uint32_t partition_id : 1; + uint32_t reserved : 29; + } valid_bits; + uint32_t valid_mask; + }; + uint32_t record_length; /* Total size of CPER Entry */ + struct ras_cper_timestamp timestamp; + char platform_id[16]; + struct ras_cper_guid partition_id; /* Reserved */ + char creator_id[16]; + struct ras_cper_guid notify_type; /* CMC, MCE */ + char record_id[8]; /* Unique CPER Entry ID */ + uint32_t flags; /* Reserved */ + uint64_t persistence_info; /* Reserved */ + uint8_t reserved[12]; /* Reserved */ +}; + +struct cper_section_descriptor { + uint32_t sec_offset; /* Offset from the start of CPER entry */ + uint32_t sec_length; + uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */ + uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */ + union { + struct { + uint8_t fru_id : 1; + uint8_t fru_text : 1; + uint8_t reserved : 6; + } valid_bits; + uint8_t valid_mask; + }; + uint8_t reserved; + union { + struct { + uint32_t primary : 1; + uint32_t reserved1 : 2; + uint32_t exceed_err_threshold : 1; + uint32_t latent_err : 1; + uint32_t reserved2 : 27; + } flag_bits; + uint32_t flag_mask; + }; + struct ras_cper_guid sec_type; + char fru_id[16]; + enum ras_cper_severity severity; + char fru_text[20]; +}; + +struct runtime_hdr { + union { + struct { + uint64_t apic_id : 1; + uint64_t fw_id : 1; + uint64_t err_info_cnt : 6; + uint64_t err_context_cnt : 6; + } valid_bits; + uint64_t valid_mask; + }; + uint64_t apic_id; + char fw_id[48]; +}; + +struct runtime_descriptor { + struct ras_cper_guid error_type; + union { + struct { + uint64_t ms_chk : 1; + uint64_t target_addr_id : 1; + uint64_t req_id : 1; + uint64_t resp_id : 1; + uint64_t instr_ptr : 1; + uint64_t reserved : 59; + } valid_bits; + uint64_t valid_mask; + }; + union { + struct { + uint64_t err_type_valid : 1; + uint64_t pcc_valid : 1; + uint64_t uncorr_valid : 1; + uint64_t precise_ip_valid : 1; + uint64_t restartable_ip_valid : 1; + uint64_t overflow_valid : 1; + uint64_t reserved1 : 10; + uint64_t err_type : 2; + uint64_t pcc : 1; + uint64_t uncorr : 1; + uint64_t precised_ip : 1; + uint64_t restartable_ip : 1; + uint64_t overflow : 1; + uint64_t reserved2 : 41; + } ms_chk_bits; + uint64_t ms_chk_mask; + }; + uint64_t target_addr_id; + uint64_t req_id; + uint64_t resp_id; + uint64_t instr_ptr; +}; + +struct runtime_error_reg { + uint16_t reg_ctx_type; + uint16_t reg_arr_size; + uint32_t msr_addr; + uint64_t mm_reg_addr; + uint64_t reg_dump[RAS_CPER_ACA_REG_COUNT]; +}; + +struct cper_section_runtime { + struct runtime_hdr hdr; + struct runtime_descriptor descriptor; + struct runtime_error_reg reg; +}; + +struct crashdump_hdr { + uint64_t reserved1; + uint64_t reserved2; + char fw_id[48]; + uint64_t reserved3[8]; +}; + +struct fatal_reg_info { + uint64_t status; + uint64_t addr; + uint64_t ipid; + uint64_t synd; +}; + +struct crashdump_fatal { + uint16_t reg_ctx_type; + uint16_t reg_arr_size; + uint32_t reserved1; + uint64_t reserved2; + struct fatal_reg_info reg; +}; + +struct crashdump_boot { + uint16_t reg_ctx_type; + uint16_t reg_arr_size; + uint32_t reserved1; + uint64_t reserved2; + uint64_t msg[CPER_OAM_MAX_COUNT]; +}; + +struct cper_section_fatal { + struct crashdump_hdr hdr; + struct crashdump_fatal data; +}; + +struct cper_section_boot { + struct crashdump_hdr hdr; + struct crashdump_boot data; +}; + +struct ras_cper_fatal_record { + struct cper_section_hdr hdr; + struct cper_section_descriptor descriptor; + struct cper_section_fatal fatal; +}; +#pragma pack(pop) + +#define RAS_HDR_LEN (sizeof(struct cper_section_hdr)) +#define RAS_SEC_DESC_LEN (sizeof(struct cper_sec_desc)) + +#define RAS_BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot)) +#define RAS_FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal)) +#define RAS_NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err)) + +#define RAS_SEC_DESC_OFFSET(idx) (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * idx)) + +#define RAS_BOOT_SEC_OFFSET(count, idx) \ + (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_BOOT_SEC_LEN * idx)) +#define RAS_FATAL_SEC_OFFSET(count, idx) \ + (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_FATAL_SEC_LEN * idx)) +#define RAS_NONSTD_SEC_OFFSET(count, idx) \ + (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_NONSTD_SEC_LEN * idx)) + +struct ras_core_context; +struct ras_log_info; +int ras_cper_generate_cper(struct ras_core_context *ras_core, + struct ras_log_info **trace_list, uint32_t count, + uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len); +#endif -- cgit v1.2.3 From 13c91b5b4378b5d08dc9ae9121b1645a0beec0bb Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:13:19 +0800 Subject: drm/amd/ras: Add rascore unified interface function 1. Complete the initialization call of all sub-functions. 2. Export common interfaces. V2: Remove the use of typedef to define function pointer. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/ras.h | 368 ++++++++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_core.c | 603 +++++++++++++++++++++++++++++ 2 files changed, 971 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras.h create mode 100644 drivers/gpu/drm/amd/ras/rascore/ras_core.c diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h b/drivers/gpu/drm/amd/ras/rascore/ras.h new file mode 100644 index 000000000000..fa224b36e3f2 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_H__ +#define __RAS_H__ +#include "ras_sys.h" +#include "ras_umc.h" +#include "ras_aca.h" +#include "ras_eeprom.h" +#include "ras_core_status.h" +#include "ras_process.h" +#include "ras_gfx.h" +#include "ras_cmd.h" +#include "ras_nbio.h" +#include "ras_mp1.h" +#include "ras_psp.h" +#include "ras_log_ring.h" + +#define RAS_HW_ERR "[Hardware Error]: " + +#define RAS_GPU_PAGE_SHIFT 12 +#define RAS_ADDR_TO_PFN(addr) ((addr) >> RAS_GPU_PAGE_SHIFT) +#define RAS_PFN_TO_ADDR(pfn) ((pfn) << RAS_GPU_PAGE_SHIFT) + +#define RAS_CORE_RESET_GPU 0x10000 + +#define GPU_RESET_CAUSE_POISON (RAS_CORE_RESET_GPU | 0x0001) +#define GPU_RESET_CAUSE_FATAL (RAS_CORE_RESET_GPU | 0x0002) +#define GPU_RESET_CAUSE_RMA (RAS_CORE_RESET_GPU | 0x0004) + +enum ras_block_id { + RAS_BLOCK_ID__UMC = 0, + RAS_BLOCK_ID__SDMA, + RAS_BLOCK_ID__GFX, + RAS_BLOCK_ID__MMHUB, + RAS_BLOCK_ID__ATHUB, + RAS_BLOCK_ID__PCIE_BIF, + RAS_BLOCK_ID__HDP, + RAS_BLOCK_ID__XGMI_WAFL, + RAS_BLOCK_ID__DF, + RAS_BLOCK_ID__SMN, + RAS_BLOCK_ID__SEM, + RAS_BLOCK_ID__MP0, + RAS_BLOCK_ID__MP1, + RAS_BLOCK_ID__FUSE, + RAS_BLOCK_ID__MCA, + RAS_BLOCK_ID__VCN, + RAS_BLOCK_ID__JPEG, + RAS_BLOCK_ID__IH, + RAS_BLOCK_ID__MPIO, + + RAS_BLOCK_ID__LAST +}; + +enum ras_ecc_err_type { + RAS_ECC_ERR__NONE = 0, + RAS_ECC_ERR__PARITY = 1, + RAS_ECC_ERR__SINGLE_CORRECTABLE = 2, + RAS_ECC_ERR__MULTI_UNCORRECTABLE = 4, + RAS_ECC_ERR__POISON = 8, +}; + +enum ras_err_type { + RAS_ERR_TYPE__UE = 0, + RAS_ERR_TYPE__CE, + RAS_ERR_TYPE__DE, + RAS_ERR_TYPE__LAST +}; + +enum ras_seqno_type { + RAS_SEQNO_TYPE_INVALID = 0, + RAS_SEQNO_TYPE_UE, + RAS_SEQNO_TYPE_CE, + RAS_SEQNO_TYPE_DE, + RAS_SEQNO_TYPE_POISON_CONSUMPTION, + RAS_SEQNO_TYPE_COUNT_MAX, +}; + +enum ras_seqno_fifo { + SEQNO_FIFO_INVALID = 0, + SEQNO_FIFO_POISON_CREATION, + SEQNO_FIFO_POISON_CONSUMPTION, + SEQNO_FIFO_COUNT_MAX +}; + +enum ras_notify_event { + RAS_EVENT_ID__NONE, + RAS_EVENT_ID__BAD_PAGE_DETECTED, + RAS_EVENT_ID__POISON_CONSUMPTION, + RAS_EVENT_ID__RESERVE_BAD_PAGE, + RAS_EVENT_ID__DEVICE_RMA, + RAS_EVENT_ID__UPDATE_BAD_PAGE_NUM, + RAS_EVENT_ID__UPDATE_BAD_CHANNEL_BITMAP, + RAS_EVENT_ID__FATAL_ERROR_DETECTED, + RAS_EVENT_ID__RESET_GPU, + RAS_EVENT_ID__RESET_VF, +}; + +enum ras_gpu_status { + RAS_GPU_STATUS__NOT_READY = 0, + RAS_GPU_STATUS__READY = 0x1, + RAS_GPU_STATUS__IN_RESET = 0x2, + RAS_GPU_STATUS__IS_RMA = 0x4, + RAS_GPU_STATUS__IS_VF = 0x8, +}; + +struct ras_core_context; +struct ras_bank_ecc; +struct ras_umc; +struct ras_aca; +struct ras_process; +struct ras_nbio; +struct ras_log_ring; +struct ras_psp; + +struct ras_mp1_sys_func { + int (*mp1_get_valid_bank_count)(struct ras_core_context *ras_core, + u32 msg, u32 *count); + int (*mp1_dump_valid_bank)(struct ras_core_context *ras_core, + u32 msg, u32 idx, u32 reg_idx, u64 *val); +}; + +struct ras_eeprom_sys_func { + int (*eeprom_i2c_xfer)(struct ras_core_context *ras_core, + u32 eeprom_addr, u8 *eeprom_buf, u32 buf_size, bool read); + int (*update_eeprom_i2c_config)(struct ras_core_context *ras_core); +}; + +struct ras_nbio_sys_func { + int (*set_ras_controller_irq_state)(struct ras_core_context *ras_core, + bool state); + int (*set_ras_err_event_athub_irq_state)(struct ras_core_context *ras_core, + bool state); +}; + +struct ras_time { + int tm_sec; + int tm_min; + int tm_hour; + int tm_mday; + int tm_mon; + long tm_year; +}; + +struct device_system_info { + uint32_t device_id; + uint32_t vendor_id; + uint32_t socket_id; +}; + +enum gpu_mem_type { + GPU_MEM_TYPE_DEFAULT, + GPU_MEM_TYPE_RAS_PSP_RING, + GPU_MEM_TYPE_RAS_PSP_CMD, + GPU_MEM_TYPE_RAS_PSP_FENCE, + GPU_MEM_TYPE_RAS_TA_FW, + GPU_MEM_TYPE_RAS_TA_CMD, +}; + +struct ras_psp_sys_func { + int (*get_ras_psp_system_status)(struct ras_core_context *ras_core, + struct ras_psp_sys_status *status); + int (*get_ras_ta_init_param)(struct ras_core_context *ras_core, + struct ras_ta_init_param *ras_ta_param); +}; + +struct ras_sys_func { + int (*gpu_reset_lock)(struct ras_core_context *ras_core, + bool down, bool try); + int (*check_gpu_status)(struct ras_core_context *ras_core, + uint32_t *status); + int (*gen_seqno)(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, uint64_t *seqno); + int (*async_handle_ras_event)(struct ras_core_context *ras_core, void *data); + int (*ras_notifier)(struct ras_core_context *ras_core, + enum ras_notify_event event_id, void *data); + u64 (*get_utc_second_timestamp)(struct ras_core_context *ras_core); + int (*get_device_system_info)(struct ras_core_context *ras_core, + struct device_system_info *dev_info); + bool (*detect_ras_interrupt)(struct ras_core_context *ras_core); + int (*get_gpu_mem)(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem); + int (*put_gpu_mem)(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem); +}; + +struct ras_ecc_count { + uint64_t new_ce_count; + uint64_t total_ce_count; + uint64_t new_ue_count; + uint64_t total_ue_count; + uint64_t new_de_count; + uint64_t total_de_count; +}; + +struct ras_bank_ecc { + uint32_t nps; + uint64_t seq_no; + uint64_t status; + uint64_t ipid; + uint64_t addr; +}; + +struct ras_bank_ecc_node { + struct list_head node; + struct ras_bank_ecc ecc; +}; + +struct ras_aca_config { + u32 socket_num_per_hive; + u32 aid_num_per_socket; + u32 xcd_num_per_aid; +}; + +struct ras_mp1_config { + const struct ras_mp1_sys_func *mp1_sys_fn; +}; + +struct ras_nbio_config { + const struct ras_nbio_sys_func *nbio_sys_fn; +}; + +struct ras_psp_config { + const struct ras_psp_sys_func *psp_sys_fn; +}; + +struct ras_umc_config { + uint32_t umc_vram_type; +}; + +struct ras_eeprom_config { + const struct ras_eeprom_sys_func *eeprom_sys_fn; + int eeprom_record_threshold_config; + uint32_t eeprom_record_threshold_count; + void *eeprom_i2c_adapter; + u32 eeprom_i2c_addr; + u32 eeprom_i2c_port; + u16 max_i2c_read_len; + u16 max_i2c_write_len; +}; + +struct ras_core_config { + u32 aca_ip_version; + u32 umc_ip_version; + u32 mp1_ip_version; + u32 gfx_ip_version; + u32 nbio_ip_version; + u32 psp_ip_version; + + bool poison_supported; + bool ras_eeprom_supported; + const struct ras_sys_func *sys_fn; + + struct ras_aca_config aca_cfg; + struct ras_mp1_config mp1_cfg; + struct ras_nbio_config nbio_cfg; + struct ras_psp_config psp_cfg; + struct ras_eeprom_config eeprom_cfg; + struct ras_umc_config umc_cfg; +}; + +struct ras_core_context { + void *dev; + struct ras_core_config *config; + u32 socket_num_per_hive; + u32 aid_num_per_socket; + u32 xcd_num_per_aid; + int max_ue_banks_per_query; + int max_ce_banks_per_query; + struct ras_aca ras_aca; + + bool ras_eeprom_supported; + struct ras_eeprom_control ras_eeprom; + + struct ras_psp ras_psp; + struct ras_umc ras_umc; + struct ras_nbio ras_nbio; + struct ras_gfx ras_gfx; + struct ras_mp1 ras_mp1; + struct ras_process ras_proc; + struct ras_cmd_mgr ras_cmd; + struct ras_log_ring ras_log_ring; + + const struct ras_sys_func *sys_fn; + + /* is poison mode supported */ + bool poison_supported; + + bool is_rma; + bool is_initialized; + + struct kfifo de_seqno_fifo; + struct kfifo consumption_seqno_fifo; + spinlock_t seqno_lock; + + bool ras_core_enabled; +}; + +struct ras_core_context *ras_core_create(struct ras_core_config *init_config); +void ras_core_destroy(struct ras_core_context *ras_core); +int ras_core_sw_init(struct ras_core_context *ras_core); +int ras_core_sw_fini(struct ras_core_context *ras_core); +int ras_core_hw_init(struct ras_core_context *ras_core); +int ras_core_hw_fini(struct ras_core_context *ras_core); +bool ras_core_is_ready(struct ras_core_context *ras_core); +uint64_t ras_core_gen_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type); +uint64_t ras_core_get_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, bool pop); + +int ras_core_put_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, uint64_t seqno); + +int ras_core_update_ecc_info(struct ras_core_context *ras_core); +int ras_core_query_block_ecc_data(struct ras_core_context *ras_core, + enum ras_block_id block, struct ras_ecc_count *ecc_count); + +bool ras_core_gpu_in_reset(struct ras_core_context *ras_core); +bool ras_core_gpu_is_rma(struct ras_core_context *ras_core); +bool ras_core_gpu_is_vf(struct ras_core_context *ras_core); +bool ras_core_handle_nbio_irq(struct ras_core_context *ras_core, void *data); +int ras_core_handle_fatal_error(struct ras_core_context *ras_core); + +uint32_t ras_core_get_curr_nps_mode(struct ras_core_context *ras_core); +const char *ras_core_get_ras_block_name(enum ras_block_id block_id); +int ras_core_convert_timestamp_to_time(struct ras_core_context *ras_core, + uint64_t timestamp, struct ras_time *tm); + +int ras_core_set_status(struct ras_core_context *ras_core, bool enable); +bool ras_core_is_enabled(struct ras_core_context *ras_core); +uint64_t ras_core_get_utc_second_timestamp(struct ras_core_context *ras_core); +int ras_core_translate_soc_pa_and_bank(struct ras_core_context *ras_core, + uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa); +bool ras_core_ras_interrupt_detected(struct ras_core_context *ras_core); +int ras_core_get_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem); +int ras_core_put_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem); +bool ras_core_check_safety_watermark(struct ras_core_context *ras_core); +int ras_core_down_trylock_gpu_reset_lock(struct ras_core_context *ras_core); +void ras_core_down_gpu_reset_lock(struct ras_core_context *ras_core); +void ras_core_up_gpu_reset_lock(struct ras_core_context *ras_core); +int ras_core_event_notify(struct ras_core_context *ras_core, + enum ras_notify_event event_id, void *data); +int ras_core_get_device_system_info(struct ras_core_context *ras_core, + struct device_system_info *dev_info); +#endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c new file mode 100644 index 000000000000..41fc9f0d84e4 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras.h" +#include "ras_core_status.h" + +#define RAS_SEQNO_FIFO_SIZE (128 * sizeof(uint64_t)) + +#define IS_LEAP_YEAR(x) ((x % 4 == 0 && x % 100 != 0) || x % 400 == 0) + +static const char * const ras_block_name[] = { + "umc", + "sdma", + "gfx", + "mmhub", + "athub", + "pcie_bif", + "hdp", + "xgmi_wafl", + "df", + "smn", + "sem", + "mp0", + "mp1", + "fuse", + "mca", + "vcn", + "jpeg", + "ih", + "mpio", +}; + +const char *ras_core_get_ras_block_name(enum ras_block_id block_id) +{ + if (block_id >= ARRAY_SIZE(ras_block_name)) + return ""; + + return ras_block_name[block_id]; +} + +int ras_core_convert_timestamp_to_time(struct ras_core_context *ras_core, + uint64_t timestamp, struct ras_time *tm) +{ + int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + uint64_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + int seconds_per_day = 24 * 60 * 60; + int seconds_per_hour = 60 * 60; + int seconds_per_minute = 60; + int days, remaining_seconds; + + days = timestamp / seconds_per_day; + remaining_seconds = timestamp % seconds_per_day; + + /* utc_timestamp follows the Unix epoch */ + year = 1970; + while (days >= 365) { + if (IS_LEAP_YEAR(year)) { + if (days < 366) + break; + days -= 366; + } else { + days -= 365; + } + year++; + } + + days_in_month[1] += IS_LEAP_YEAR(year); + + month = 0; + while (days >= days_in_month[month]) { + days -= days_in_month[month]; + month++; + } + month++; + day = days + 1; + + if (remaining_seconds) { + hour = remaining_seconds / seconds_per_hour; + minute = (remaining_seconds % seconds_per_hour) / seconds_per_minute; + second = remaining_seconds % seconds_per_minute; + } + + tm->tm_year = year; + tm->tm_mon = month; + tm->tm_mday = day; + tm->tm_hour = hour; + tm->tm_min = minute; + tm->tm_sec = second; + + return 0; +} + +bool ras_core_gpu_in_reset(struct ras_core_context *ras_core) +{ + uint32_t status = 0; + + if (ras_core->sys_fn && + ras_core->sys_fn->check_gpu_status) + ras_core->sys_fn->check_gpu_status(ras_core, &status); + + return (status & RAS_GPU_STATUS__IN_RESET) ? true : false; +} + +bool ras_core_gpu_is_vf(struct ras_core_context *ras_core) +{ + uint32_t status = 0; + + if (ras_core->sys_fn && + ras_core->sys_fn->check_gpu_status) + ras_core->sys_fn->check_gpu_status(ras_core, &status); + + return (status & RAS_GPU_STATUS__IS_VF) ? true : false; +} + +bool ras_core_gpu_is_rma(struct ras_core_context *ras_core) +{ + if (!ras_core) + return false; + + return ras_core->is_rma; +} + +static int ras_core_seqno_fifo_write(struct ras_core_context *ras_core, + enum ras_seqno_fifo fifo_type, uint64_t seqno) +{ + int ret = 0; + struct kfifo *seqno_fifo = NULL; + + if (fifo_type == SEQNO_FIFO_POISON_CREATION) + seqno_fifo = &ras_core->de_seqno_fifo; + else if (fifo_type == SEQNO_FIFO_POISON_CONSUMPTION) + seqno_fifo = &ras_core->consumption_seqno_fifo; + + if (seqno_fifo) + ret = kfifo_in_spinlocked(seqno_fifo, + &seqno, sizeof(seqno), &ras_core->seqno_lock); + + return ret ? 0 : -EINVAL; +} + +static int ras_core_seqno_fifo_read(struct ras_core_context *ras_core, + enum ras_seqno_fifo fifo_type, uint64_t *seqno, bool pop) +{ + int ret = 0; + struct kfifo *seqno_fifo = NULL; + + if (fifo_type == SEQNO_FIFO_POISON_CREATION) + seqno_fifo = &ras_core->de_seqno_fifo; + else if (fifo_type == SEQNO_FIFO_POISON_CONSUMPTION) + seqno_fifo = &ras_core->consumption_seqno_fifo; + + if (seqno_fifo) { + if (pop) + ret = kfifo_out_spinlocked(seqno_fifo, + seqno, sizeof(*seqno), &ras_core->seqno_lock); + else + ret = kfifo_out_peek(seqno_fifo, seqno, sizeof(*seqno)); + } + + return ret ? 0 : -EINVAL; +} + +uint64_t ras_core_gen_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type type) +{ + uint64_t seqno = 0; + + if (ras_core->sys_fn && + ras_core->sys_fn->gen_seqno) + ras_core->sys_fn->gen_seqno(ras_core, type, &seqno); + + return seqno; +} + +int ras_core_put_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, uint64_t seqno) +{ + int ret = 0; + + if (seqno_type >= RAS_SEQNO_TYPE_COUNT_MAX) + return -EINVAL; + + if (seqno_type == RAS_SEQNO_TYPE_DE) + ret = ras_core_seqno_fifo_write(ras_core, + SEQNO_FIFO_POISON_CREATION, seqno); + else if (seqno_type == RAS_SEQNO_TYPE_POISON_CONSUMPTION) + ret = ras_core_seqno_fifo_write(ras_core, + SEQNO_FIFO_POISON_CONSUMPTION, seqno); + else + ret = -EINVAL; + + return ret; +} + +uint64_t ras_core_get_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, bool pop) +{ + uint64_t seq_no; + int ret = -ENODATA; + + if (seqno_type >= RAS_SEQNO_TYPE_COUNT_MAX) + return 0; + + if (seqno_type == RAS_SEQNO_TYPE_DE) + ret = ras_core_seqno_fifo_read(ras_core, + SEQNO_FIFO_POISON_CREATION, &seq_no, pop); + else if (seqno_type == RAS_SEQNO_TYPE_POISON_CONSUMPTION) + ret = ras_core_seqno_fifo_read(ras_core, + SEQNO_FIFO_POISON_CONSUMPTION, &seq_no, pop); + + if (ret) + seq_no = ras_core_gen_seqno(ras_core, seqno_type); + + return seq_no; +} + +static int ras_core_eeprom_recovery(struct ras_core_context *ras_core) +{ + int count; + int ret; + + count = ras_eeprom_get_record_count(ras_core); + if (!count) + return 0; + + /* Avoid bad page to be loaded again after gpu reset */ + if (ras_umc_get_saved_eeprom_count(ras_core) >= count) + return 0; + + ret = ras_umc_load_bad_pages(ras_core); + if (ret) { + RAS_DEV_ERR(ras_core->dev, "ras_umc_load_bad_pages failed: %d\n", ret); + return ret; + } + + ras_eeprom_sync_info(ras_core); + + return ret; +} + +struct ras_core_context *ras_core_create(struct ras_core_config *init_config) +{ + struct ras_core_context *ras_core; + struct ras_core_config *config; + + ras_core = kzalloc(sizeof(*ras_core), GFP_KERNEL); + if (!ras_core) + return NULL; + + config = kzalloc(sizeof(*config), GFP_KERNEL); + if (!config) { + kfree(ras_core); + return NULL; + } + + memcpy(config, init_config, sizeof(*config)); + ras_core->config = config; + + return ras_core; +} + +void ras_core_destroy(struct ras_core_context *ras_core) +{ + if (ras_core) + kfree(ras_core->config); + + kfree(ras_core); +} + +int ras_core_sw_init(struct ras_core_context *ras_core) +{ + int ret; + + if (!ras_core->config) { + RAS_DEV_ERR(ras_core->dev, "No ras core config!\n"); + return -EINVAL; + } + + ras_core->sys_fn = ras_core->config->sys_fn; + if (!ras_core->sys_fn) + return -EINVAL; + + ret = kfifo_alloc(&ras_core->de_seqno_fifo, + RAS_SEQNO_FIFO_SIZE, GFP_KERNEL); + if (ret) + return ret; + + ret = kfifo_alloc(&ras_core->consumption_seqno_fifo, + RAS_SEQNO_FIFO_SIZE, GFP_KERNEL); + if (ret) + return ret; + + spin_lock_init(&ras_core->seqno_lock); + + ret = ras_aca_sw_init(ras_core); + if (ret) + return ret; + + ret = ras_umc_sw_init(ras_core); + if (ret) + return ret; + + ret = ras_cmd_init(ras_core); + if (ret) + return ret; + + ret = ras_log_ring_sw_init(ras_core); + if (ret) + return ret; + + ret = ras_psp_sw_init(ras_core); + if (ret) + return ret; + + return 0; +} + +int ras_core_sw_fini(struct ras_core_context *ras_core) +{ + kfifo_free(&ras_core->de_seqno_fifo); + kfifo_free(&ras_core->consumption_seqno_fifo); + + ras_psp_sw_fini(ras_core); + ras_log_ring_sw_fini(ras_core); + ras_cmd_fini(ras_core); + ras_umc_sw_fini(ras_core); + ras_aca_sw_fini(ras_core); + + return 0; +} + +int ras_core_hw_init(struct ras_core_context *ras_core) +{ + int ret; + + ras_core->ras_eeprom_supported = + ras_core->config->ras_eeprom_supported; + + ras_core->poison_supported = ras_core->config->poison_supported; + + ret = ras_psp_hw_init(ras_core); + if (ret) + return ret; + + ret = ras_aca_hw_init(ras_core); + if (ret) + goto init_err1; + + ret = ras_mp1_hw_init(ras_core); + if (ret) + goto init_err2; + + ret = ras_nbio_hw_init(ras_core); + if (ret) + goto init_err3; + + ret = ras_umc_hw_init(ras_core); + if (ret) + goto init_err4; + + ret = ras_gfx_hw_init(ras_core); + if (ret) + goto init_err5; + + ret = ras_eeprom_hw_init(ras_core); + if (ret) + goto init_err6; + + ret = ras_core_eeprom_recovery(ras_core); + if (ret) { + RAS_DEV_ERR(ras_core->dev, + "Failed to recovery ras core, ret:%d\n", ret); + goto init_err6; + } + + ret = ras_eeprom_check_storage_status(ras_core); + if (ret) + goto init_err6; + + ret = ras_process_init(ras_core); + if (ret) + goto init_err7; + + ras_core->is_initialized = true; + + return 0; + +init_err7: + ras_eeprom_hw_fini(ras_core); +init_err6: + ras_gfx_hw_fini(ras_core); +init_err5: + ras_umc_hw_fini(ras_core); +init_err4: + ras_nbio_hw_fini(ras_core); +init_err3: + ras_mp1_hw_fini(ras_core); +init_err2: + ras_aca_hw_fini(ras_core); +init_err1: + ras_psp_hw_fini(ras_core); + return ret; +} + +int ras_core_hw_fini(struct ras_core_context *ras_core) +{ + ras_core->is_initialized = false; + + ras_process_fini(ras_core); + ras_eeprom_hw_fini(ras_core); + ras_gfx_hw_fini(ras_core); + ras_nbio_hw_fini(ras_core); + ras_umc_hw_fini(ras_core); + ras_mp1_hw_fini(ras_core); + ras_aca_hw_fini(ras_core); + ras_psp_hw_fini(ras_core); + + return 0; +} + +bool ras_core_handle_nbio_irq(struct ras_core_context *ras_core, void *data) +{ + return ras_nbio_handle_irq_error(ras_core, data); +} + +int ras_core_handle_fatal_error(struct ras_core_context *ras_core) +{ + int ret = 0; + + ras_aca_mark_fatal_flag(ras_core); + + ret = ras_core_event_notify(ras_core, + RAS_EVENT_ID__FATAL_ERROR_DETECTED, NULL); + + return ret; +} + +uint32_t ras_core_get_curr_nps_mode(struct ras_core_context *ras_core) +{ + if (ras_core->ras_nbio.ip_func && + ras_core->ras_nbio.ip_func->get_memory_partition_mode) + return ras_core->ras_nbio.ip_func->get_memory_partition_mode(ras_core); + + RAS_DEV_ERR(ras_core->dev, "Failed to get gpu memory nps mode!\n"); + return 0; +} + +int ras_core_update_ecc_info(struct ras_core_context *ras_core) +{ + int ret; + + ret = ras_aca_update_ecc(ras_core, RAS_ERR_TYPE__CE, NULL); + if (!ret) + ret = ras_aca_update_ecc(ras_core, RAS_ERR_TYPE__UE, NULL); + + return ret; +} + +int ras_core_query_block_ecc_data(struct ras_core_context *ras_core, + enum ras_block_id block, struct ras_ecc_count *ecc_count) +{ + int ret; + + if (!ecc_count || (block >= RAS_BLOCK_ID__LAST) || !ras_core) + return -EINVAL; + + ret = ras_aca_get_block_ecc_count(ras_core, block, ecc_count); + if (!ret) + ras_aca_clear_block_new_ecc_count(ras_core, block); + + return ret; +} + +int ras_core_set_status(struct ras_core_context *ras_core, bool enable) +{ + ras_core->ras_core_enabled = enable; + + return 0; +} + +bool ras_core_is_enabled(struct ras_core_context *ras_core) +{ + return ras_core->ras_core_enabled; +} + +uint64_t ras_core_get_utc_second_timestamp(struct ras_core_context *ras_core) +{ + if (ras_core && ras_core->sys_fn && + ras_core->sys_fn->get_utc_second_timestamp) + return ras_core->sys_fn->get_utc_second_timestamp(ras_core); + + RAS_DEV_ERR(ras_core->dev, "Failed to get system timestamp!\n"); + return 0; +} + +int ras_core_translate_soc_pa_and_bank(struct ras_core_context *ras_core, + uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa) +{ + if (!ras_core || !soc_pa || !bank_addr) + return -EINVAL; + + return ras_umc_translate_soc_pa_and_bank(ras_core, soc_pa, bank_addr, bank_to_pa); +} + +bool ras_core_ras_interrupt_detected(struct ras_core_context *ras_core) +{ + if (ras_core && ras_core->sys_fn && + ras_core->sys_fn->detect_ras_interrupt) + return ras_core->sys_fn->detect_ras_interrupt(ras_core); + + RAS_DEV_ERR(ras_core->dev, "Failed to detect ras interrupt!\n"); + return false; +} + +int ras_core_get_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem) +{ + if (ras_core->sys_fn && ras_core->sys_fn->get_gpu_mem) + return ras_core->sys_fn->get_gpu_mem(ras_core, mem_type, gpu_mem); + + RAS_DEV_ERR(ras_core->dev, "Not config get gpu memory API!\n"); + return -EACCES; +} + +int ras_core_put_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem) +{ + if (ras_core->sys_fn && ras_core->sys_fn->put_gpu_mem) + return ras_core->sys_fn->put_gpu_mem(ras_core, mem_type, gpu_mem); + + RAS_DEV_ERR(ras_core->dev, "Not config put gpu memory API!!\n"); + return -EACCES; +} + +bool ras_core_is_ready(struct ras_core_context *ras_core) +{ + return ras_core ? ras_core->is_initialized : false; +} + +bool ras_core_check_safety_watermark(struct ras_core_context *ras_core) +{ + return ras_eeprom_check_safety_watermark(ras_core); +} + +int ras_core_down_trylock_gpu_reset_lock(struct ras_core_context *ras_core) +{ + if (ras_core->sys_fn && ras_core->sys_fn->gpu_reset_lock) + return ras_core->sys_fn->gpu_reset_lock(ras_core, true, true); + + return 1; +} + +void ras_core_down_gpu_reset_lock(struct ras_core_context *ras_core) +{ + if (ras_core->sys_fn && ras_core->sys_fn->gpu_reset_lock) + ras_core->sys_fn->gpu_reset_lock(ras_core, true, false); +} + +void ras_core_up_gpu_reset_lock(struct ras_core_context *ras_core) +{ + if (ras_core->sys_fn && ras_core->sys_fn->gpu_reset_lock) + ras_core->sys_fn->gpu_reset_lock(ras_core, false, false); +} + +int ras_core_event_notify(struct ras_core_context *ras_core, + enum ras_notify_event event_id, void *data) +{ + if (ras_core && ras_core->sys_fn && + ras_core->sys_fn->ras_notifier) + return ras_core->sys_fn->ras_notifier(ras_core, event_id, data); + + return -RAS_CORE_NOT_SUPPORTED; +} + +int ras_core_get_device_system_info(struct ras_core_context *ras_core, + struct device_system_info *dev_info) +{ + if (ras_core && ras_core->sys_fn && + ras_core->sys_fn->get_device_system_info) + return ras_core->sys_fn->get_device_system_info(ras_core, dev_info); + + return -RAS_CORE_NOT_SUPPORTED; +} -- cgit v1.2.3 From cef10272e796f5984fa1d3f9b48c5ffed9b9e6a8 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:55:37 +0800 Subject: drm/amd/ras: Add files to ras core Makefile Add files to ras core Makefile. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/rascore/Makefile | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/gpu/drm/amd/ras/rascore/Makefile b/drivers/gpu/drm/amd/ras/rascore/Makefile index e69de29bb2d1..e826a1f86424 100644 --- a/drivers/gpu/drm/amd/ras/rascore/Makefile +++ b/drivers/gpu/drm/amd/ras/rascore/Makefile @@ -0,0 +1,44 @@ +# +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +RAS_CORE_FILES = ras_core.o \ + ras_mp1.o \ + ras_mp1_v13_0.o \ + ras_aca.o \ + ras_aca_v1_0.o \ + ras_eeprom.o \ + ras_umc.o \ + ras_umc_v12_0.o \ + ras_cmd.o \ + ras_gfx.o \ + ras_gfx_v9_0.o \ + ras_process.o \ + ras_nbio.o \ + ras_nbio_v7_9.o \ + ras_log_ring.o \ + ras_cper.o \ + ras_psp.o \ + ras_psp_v13_0.o + + +RAS_CORE = $(addprefix $(AMD_GPU_RAS_PATH)/rascore/,$(RAS_CORE_FILES)) + +AMD_GPU_RAS_FILES += $(RAS_CORE) -- cgit v1.2.3 From 960cc53ca888891c2a16eb7de38a24dc5dc7f534 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 15:12:07 +0800 Subject: drm/amd/ras: Add amdgpu nbio v7_9 configuration function Add amdgpu nbio v7_9 configuration function. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/Makefile | 0 .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c | 125 +++++++++++++++++++++ .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h | 30 +++++ 3 files changed, 155 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/Makefile create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/Makefile b/drivers/gpu/drm/amd/ras/ras_mgr/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c new file mode 100644 index 000000000000..2783f5875c7c --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras_mgr.h" +#include "amdgpu_ras_nbio_v7_9.h" +#include "nbio/nbio_7_9_0_offset.h" +#include "nbio/nbio_7_9_0_sh_mask.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" + +static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + /* Dummy function, there is no initialization operation in driver */ + + return 0; +} + +static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + /* Dummy function, there is no initialization operation in driver */ + + return 0; +} + +static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = { + .set = nbio_v7_9_set_ras_controller_irq_state, + .process = nbio_v7_9_process_ras_controller_irq, +}; + +static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = { + .set = nbio_v7_9_set_ras_err_event_athub_irq_state, + .process = nbio_v7_9_process_err_event_athub_irq, +}; + +static int nbio_v7_9_init_ras_controller_interrupt(struct ras_core_context *ras_core, bool state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int r; + + /* init the irq funcs */ + adev->nbio.ras_controller_irq.funcs = + &nbio_v7_9_ras_controller_irq_funcs; + adev->nbio.ras_controller_irq.num_types = 1; + + /* register ras controller interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, + &adev->nbio.ras_controller_irq); + + return r; +} + +static int nbio_v7_9_init_ras_err_event_athub_interrupt(struct ras_core_context *ras_core, + bool state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbio_v7_9_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; +} + +const struct ras_nbio_sys_func amdgpu_ras_nbio_sys_func_v7_9 = { + .set_ras_controller_irq_state = nbio_v7_9_init_ras_controller_interrupt, + .set_ras_err_event_athub_irq_state = nbio_v7_9_init_ras_err_event_athub_interrupt, +}; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h new file mode 100644 index 000000000000..272259e9a0e7 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_nbio_v7_9.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RAS_NBIO_V7_9_H__ +#define __AMDGPU_RAS_NBIO_V7_9_H__ + +extern const struct ras_nbio_sys_func amdgpu_ras_nbio_sys_func_v7_9; + +#endif -- cgit v1.2.3 From 585fe8f3b365eac2ed440566ad17faba8f4826dd Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 17:59:25 +0800 Subject: drm/amd/ras: Add amdgpu mp1 v13_0 configuration function Add amdgpu mp1 v13_0 configuration function. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c | 94 ++++++++++++++++++++++ .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h | 30 +++++++ 2 files changed, 124 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c new file mode 100644 index 000000000000..79a51b1603ac --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_smu.h" +#include "amdgpu_reset.h" +#include "amdgpu_ras_mp1_v13_0.h" + +#define RAS_MP1_MSG_QueryValidMcaCeCount 0x3A +#define RAS_MP1_MSG_McaBankCeDumpDW 0x3B + +static int mp1_v13_0_get_valid_bank_count(struct ras_core_context *ras_core, + u32 msg, u32 *count) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + u32 smu_msg; + int ret = 0; + + if (!count) + return -EINVAL; + + smu_msg = (msg == RAS_MP1_MSG_QueryValidMcaCeCount) ? + SMU_MSG_QueryValidMcaCeCount : SMU_MSG_QueryValidMcaCount; + + if (down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_smu_ras_send_msg(adev, smu_msg, 0, count); + up_read(&adev->reset_domain->sem); + } else { + ret = -RAS_CORE_GPU_IN_MODE1_RESET; + } + + if (ret) + *count = 0; + + return ret; +} + +static int mp1_v13_0_dump_valid_bank(struct ras_core_context *ras_core, + u32 msg, u32 idx, u32 reg_idx, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + uint32_t data[2] = {0, 0}; + uint32_t param; + int ret = 0; + int i, offset; + u32 smu_msg = (msg == RAS_MP1_MSG_McaBankCeDumpDW) ? + SMU_MSG_McaBankCeDumpDW : SMU_MSG_McaBankDumpDW; + + if (down_read_trylock(&adev->reset_domain->sem)) { + offset = reg_idx * 8; + for (i = 0; i < ARRAY_SIZE(data); i++) { + param = ((idx & 0xffff) << 16) | ((offset + (i << 2)) & 0xfffc); + ret = amdgpu_smu_ras_send_msg(adev, smu_msg, param, &data[i]); + if (ret) { + RAS_DEV_ERR(adev, "ACA failed to read register[%d], offset:0x%x\n", + reg_idx, offset); + break; + } + } + up_read(&adev->reset_domain->sem); + + if (!ret) + *val = (uint64_t)data[1] << 32 | data[0]; + } else { + ret = -RAS_CORE_GPU_IN_MODE1_RESET; + } + + return ret; +} + +const struct ras_mp1_sys_func amdgpu_ras_mp1_sys_func_v13_0 = { + .mp1_get_valid_bank_count = mp1_v13_0_get_valid_bank_count, + .mp1_dump_valid_bank = mp1_v13_0_dump_valid_bank, +}; + diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h new file mode 100644 index 000000000000..71c614ae1ae4 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_RAS_MP1_V13_0_H__ +#define __AMDGPU_RAS_MP1_V13_0_H__ +#include "ras.h" + +extern const struct ras_mp1_sys_func amdgpu_ras_mp1_sys_func_v13_0; + +#endif -- cgit v1.2.3 From 764e868928072e5d742edd4a0898260b0250f70b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 18:00:48 +0800 Subject: drm/amd/ras: Add amdgpu eeprom i2c configuration function Add amdgpu eeprom i2c configuration function. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c | 181 +++++++++++++++++++++ .../drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h | 27 +++ 2 files changed, 208 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c new file mode 100644 index 000000000000..1bb7b7001ec7 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_atomfirmware.h" +#include "amdgpu_ras_eeprom.h" +#include "amdgpu_ras_mgr.h" +#include "amdgpu_ras_eeprom_i2c.h" +#include "ras_eeprom.h" + +/* These are memory addresses as would be seen by one or more EEPROM + * chips strung on the I2C bus, usually by manipulating pins 1-3 of a + * set of EEPROM devices. They form a continuous memory space. + * + * The I2C device address includes the device type identifier, 1010b, + * which is a reserved value and indicates that this is an I2C EEPROM + * device. It also includes the top 3 bits of the 19 bit EEPROM memory + * address, namely bits 18, 17, and 16. This makes up the 7 bit + * address sent on the I2C bus with bit 0 being the direction bit, + * which is not represented here, and sent by the hardware directly. + * + * For instance, + * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0. + * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h. + * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h. + * Depending on the size of the I2C EEPROM device(s), bits 18:16 may + * address memory in a device or a device on the I2C bus, depending on + * the status of pins 1-3. See top of amdgpu_eeprom.c. + * + * The RAS table lives either at address 0 or address 40000h of EEPROM. + */ +#define EEPROM_I2C_MADDR_0 0x0 +#define EEPROM_I2C_MADDR_4 0x40000 + +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF)) +#define to_amdgpu_ras(x) (container_of(x, struct amdgpu_ras, eeprom_control)) + +#define EEPROM_PAGE_BITS 8 +#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS) +#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1) + +#define EEPROM_OFFSET_SIZE 2 + +static int ras_eeprom_i2c_config(struct ras_core_context *ras_core) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_eeprom_control *control = &ras_core->ras_eeprom; + u8 i2c_addr; + + if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) { + /* The address given by VBIOS is an 8-bit, wire-format + * address, i.e. the most significant byte. + * + * Normalize it to a 19-bit EEPROM address. Remove the + * device type identifier and make it a 7-bit address; + * then make it a 19-bit EEPROM address. See top of + * amdgpu_eeprom.c. + */ + i2c_addr = (i2c_addr & 0x0F) >> 1; + control->i2c_address = ((u32) i2c_addr) << 16; + return 0; + } + + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 14): + control->i2c_address = EEPROM_I2C_MADDR_4; + return 0; + default: + return -ENODATA; + } + return -ENODATA; +} + +static int ras_eeprom_i2c_xfer(struct ras_core_context *ras_core, u32 eeprom_addr, + u8 *eeprom_buf, u32 buf_size, bool read) +{ + struct i2c_adapter *i2c_adap = ras_core->ras_eeprom.i2c_adapter; + u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE]; + struct i2c_msg msgs[] = { + { + .flags = 0, + .len = EEPROM_OFFSET_SIZE, + .buf = eeprom_offset_buf, + }, + { + .flags = read ? I2C_M_RD : 0, + }, + }; + const u8 *p = eeprom_buf; + int r; + u16 len; + + for (r = 0; buf_size > 0; + buf_size -= len, eeprom_addr += len, eeprom_buf += len) { + /* Set the EEPROM address we want to write to/read from. + */ + msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr); + msgs[1].addr = msgs[0].addr; + msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff; + msgs[0].buf[1] = eeprom_addr & 0xff; + + if (!read) { + /* Write the maximum amount of data, without + * crossing the device's page boundary, as per + * its spec. Partial page writes are allowed, + * starting at any location within the page, + * so long as the page boundary isn't crossed + * over (actually the page pointer rolls + * over). + * + * As per the AT24CM02 EEPROM spec, after + * writing into a page, the I2C driver should + * terminate the transfer, i.e. in + * "i2c_transfer()" below, with a STOP + * condition, so that the self-timed write + * cycle begins. This is implied for the + * "i2c_transfer()" abstraction. + */ + len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK), + buf_size); + } else { + /* Reading from the EEPROM has no limitation + * on the number of bytes read from the EEPROM + * device--they are simply sequenced out. + * Keep in mind that i2c_msg.len is u16 type. + */ + len = min(U16_MAX, buf_size); + } + msgs[1].len = len; + msgs[1].buf = eeprom_buf; + + + /* This constitutes a START-STOP transaction. + */ + r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs)); + if (r != ARRAY_SIZE(msgs)) + break; + + if (!read) { + /* According to EEPROM specs the length of the + * self-writing cycle, tWR (tW), is 10 ms. + * + * TODO: Use polling on ACK, aka Acknowledge + * Polling, to minimize waiting for the + * internal write cycle to complete, as it is + * usually smaller than tWR (tW). + */ + msleep(10); + } + } + + return r < 0 ? r : eeprom_buf - p; +} + +const struct ras_eeprom_sys_func amdgpu_ras_eeprom_i2c_sys_func = { + .eeprom_i2c_xfer = ras_eeprom_i2c_xfer, + .update_eeprom_i2c_config = ras_eeprom_i2c_config, +}; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h new file mode 100644 index 000000000000..3b5878605411 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_eeprom_i2c.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_RAS_EEPROM_I2C_H__ +#define __AMDGPU_RAS_EEPROM_I2C_H__ +#include "ras.h" + +extern const struct ras_eeprom_sys_func amdgpu_ras_eeprom_i2c_sys_func; +#endif -- cgit v1.2.3 From b658fadf1cbe3f64aacb95e09beda6c479fc2f6b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 18:02:13 +0800 Subject: drm/amd/ras: Amdgpu handle ras ioctl command Amdgpu handle ras ioctl command. V2: Remove non-standard device information. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c | 363 +++++++++++++++++++++++ drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h | 55 ++++ 2 files changed, 418 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c new file mode 100644 index 000000000000..195ca51a96d5 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include "ras_sys.h" +#include "amdgpu_ras_cmd.h" +#include "amdgpu_ras_mgr.h" + +/* inject address is 52 bits */ +#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + +#define AMDGPU_RAS_TYPE_RASCORE 0x1 +#define AMDGPU_RAS_TYPE_AMDGPU 0x2 +#define AMDGPU_RAS_TYPE_VF 0x3 + +static int amdgpu_ras_query_interface_info(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_query_interface_info_rsp *output_data = + (struct ras_query_interface_info_rsp *)cmd->output_buff_raw; + int ret; + + if (cmd->input_size != sizeof(struct ras_query_interface_info_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + ret = ras_cmd_query_interface_info(ras_core, output_data); + if (!ret) { + output_data->plat_major_ver = 0; + output_data->plat_minor_ver = 0; + + output_data->interface_type = amdgpu_sriov_vf(adev) ? + RAS_CMD_INTERFACE_TYPE_VF : RAS_CMD_INTERFACE_TYPE_AMDGPU; + + cmd->output_size = sizeof(struct ras_query_interface_info_rsp); + } + + return ret; +} + +static struct ras_core_context *ras_cmd_get_ras_core(uint64_t dev_handle) +{ + struct ras_core_context *ras_core; + + if (!dev_handle || (dev_handle == RAS_CMD_DEV_HANDLE_MAGIC)) + return NULL; + + ras_core = (struct ras_core_context *)(dev_handle ^ RAS_CMD_DEV_HANDLE_MAGIC); + + if (ras_cmd_get_dev_handle(ras_core) == dev_handle) + return ras_core; + + return NULL; +} + +static int amdgpu_ras_get_devices_info(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_cmd_devices_info_rsp *output_data = + (struct ras_cmd_devices_info_rsp *)cmd->output_buff_raw; + struct ras_cmd_dev_info *dev_info; + + dev_info = &output_data->devs[0]; + dev_info->dev_handle = ras_cmd_get_dev_handle(ras_core); + dev_info->oam_id = adev->smuio.funcs->get_socket_id(adev); + dev_info->ecc_enabled = 1; + dev_info->ecc_supported = 1; + + output_data->dev_num = 1; + output_data->version = 0; + cmd->output_size = sizeof(struct ras_cmd_devices_info_rsp); + + return 0; +} + +static int amdgpu_ras_trigger_error_prepare(struct ras_core_context *ras_core, + struct ras_cmd_inject_error_req *block_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int ret; + + if (block_info->block_id == TA_RAS_BLOCK__XGMI_WAFL) { + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + RAS_DEV_WARN(adev, "Failed to disallow df cstate"); + + ret = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW); + if (ret && (ret != -EOPNOTSUPP)) + RAS_DEV_WARN(adev, "Failed to disallow XGMI power down"); + } + + return 0; +} + +static int amdgpu_ras_trigger_error_end(struct ras_core_context *ras_core, + struct ras_cmd_inject_error_req *block_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int ret; + + if (block_info->block_id == TA_RAS_BLOCK__XGMI_WAFL) { + if (amdgpu_ras_intr_triggered()) + return 0; + + ret = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT); + if (ret && (ret != -EOPNOTSUPP)) + RAS_DEV_WARN(adev, "Failed to allow XGMI power down"); + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) + RAS_DEV_WARN(adev, "Failed to allow df cstate"); + } + + return 0; +} + +static uint64_t local_addr_to_xgmi_global_addr(struct ras_core_context *ras_core, + uint64_t addr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi; + + return (addr + xgmi->physical_node_id * xgmi->node_segment_size); +} + +static int amdgpu_ras_inject_error(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_cmd_inject_error_req *req = + (struct ras_cmd_inject_error_req *)cmd->input_buff_raw; + int ret = RAS_CMD__ERROR_GENERIC; + + if (req->block_id == RAS_BLOCK_ID__UMC) { + if (amdgpu_ras_mgr_check_retired_addr(adev, req->address)) { + RAS_DEV_WARN(ras_core->dev, + "RAS WARN: inject: 0x%llx has already been marked as bad!\n", + req->address); + return RAS_CMD__ERROR_ACCESS_DENIED; + } + + if ((req->address >= adev->gmc.mc_vram_size && + adev->gmc.mc_vram_size) || + (req->address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + RAS_DEV_WARN(adev, "RAS WARN: input address 0x%llx is invalid.", + req->address); + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + } + + /* Calculate XGMI relative offset */ + if (adev->gmc.xgmi.num_physical_nodes > 1 && + req->block_id != RAS_BLOCK_ID__GFX) { + req->address = local_addr_to_xgmi_global_addr(ras_core, req->address); + } + } + + amdgpu_ras_trigger_error_prepare(ras_core, req); + ret = rascore_handle_cmd(ras_core, cmd, data); + amdgpu_ras_trigger_error_end(ras_core, req); + if (ret) { + RAS_DEV_ERR(adev, "ras inject block %u failed %d\n", req->block_id, ret); + ret = RAS_CMD__ERROR_ACCESS_DENIED; + } + + + return ret; +} + +static int amdgpu_ras_get_ras_safe_fb_addr_ranges(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_cmd_dev_handle *input_data = + (struct ras_cmd_dev_handle *)cmd->input_buff_raw; + struct ras_cmd_ras_safe_fb_address_ranges_rsp *ranges = + (struct ras_cmd_ras_safe_fb_address_ranges_rsp *)cmd->output_buff_raw; + struct amdgpu_mem_partition_info *mem_ranges; + uint32_t i = 0; + + if (cmd->input_size != sizeof(*input_data)) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + mem_ranges = adev->gmc.mem_partitions; + for (i = 0; i < adev->gmc.num_mem_partitions; i++) { + ranges->range[i].start = mem_ranges[i].range.fpfn << AMDGPU_GPU_PAGE_SHIFT; + ranges->range[i].size = mem_ranges[i].size; + ranges->range[i].idx = i; + } + + ranges->num_ranges = adev->gmc.num_mem_partitions; + + ranges->version = 0; + cmd->output_size = sizeof(struct ras_cmd_ras_safe_fb_address_ranges_rsp); + + return RAS_CMD__SUCCESS; +} + +static int ras_translate_fb_address(struct ras_core_context *ras_core, + enum ras_fb_addr_type src_type, + enum ras_fb_addr_type dest_type, + union ras_translate_fb_address *src_addr, + union ras_translate_fb_address *dest_addr) +{ + uint64_t soc_phy_addr; + int ret = RAS_CMD__SUCCESS; + + /* Does not need to be queued as event as this is a SW translation */ + switch (src_type) { + case RAS_FB_ADDR_SOC_PHY: + soc_phy_addr = src_addr->soc_phy_addr; + break; + case RAS_FB_ADDR_BANK: + ret = ras_cmd_translate_bank_to_soc_pa(ras_core, + src_addr->bank_addr, &soc_phy_addr); + if (ret) + return RAS_CMD__ERROR_GENERIC; + break; + default: + return RAS_CMD__ERROR_INVALID_CMD; + } + + switch (dest_type) { + case RAS_FB_ADDR_SOC_PHY: + dest_addr->soc_phy_addr = soc_phy_addr; + break; + case RAS_FB_ADDR_BANK: + ret = ras_cmd_translate_soc_pa_to_bank(ras_core, + soc_phy_addr, &dest_addr->bank_addr); + if (ret) + return RAS_CMD__ERROR_GENERIC; + break; + default: + return RAS_CMD__ERROR_INVALID_CMD; + } + + return ret; +} + +static int amdgpu_ras_translate_fb_address(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_translate_fb_address_req *req_buff = + (struct ras_cmd_translate_fb_address_req *)cmd->input_buff_raw; + struct ras_cmd_translate_fb_address_rsp *rsp_buff = + (struct ras_cmd_translate_fb_address_rsp *)cmd->output_buff_raw; + int ret = RAS_CMD__ERROR_GENERIC; + + if (cmd->input_size != sizeof(struct ras_cmd_translate_fb_address_req)) + return RAS_CMD__ERROR_INVALID_INPUT_SIZE; + + if ((req_buff->src_addr_type >= RAS_FB_ADDR_UNKNOWN) || + (req_buff->dest_addr_type >= RAS_FB_ADDR_UNKNOWN)) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + ret = ras_translate_fb_address(ras_core, req_buff->src_addr_type, + req_buff->dest_addr_type, &req_buff->trans_addr, &rsp_buff->trans_addr); + if (ret) + return RAS_CMD__ERROR_GENERIC; + + rsp_buff->version = 0; + cmd->output_size = sizeof(struct ras_cmd_translate_fb_address_rsp); + + return RAS_CMD__SUCCESS; +} + +static struct ras_cmd_func_map amdgpu_ras_cmd_maps[] = { + {RAS_CMD__INJECT_ERROR, amdgpu_ras_inject_error}, + {RAS_CMD__GET_SAFE_FB_ADDRESS_RANGES, amdgpu_ras_get_ras_safe_fb_addr_ranges}, + {RAS_CMD__TRANSLATE_FB_ADDRESS, amdgpu_ras_translate_fb_address}, +}; + +int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ioctl *cmd, void *data) +{ + struct ras_cmd_func_map *ras_cmd = NULL; + int i, res; + + for (i = 0; i < ARRAY_SIZE(amdgpu_ras_cmd_maps); i++) { + if (cmd->cmd_id == amdgpu_ras_cmd_maps[i].cmd_id) { + ras_cmd = &amdgpu_ras_cmd_maps[i]; + break; + } + } + + if (ras_cmd) + res = ras_cmd->func(ras_core, cmd, NULL); + else + res = RAS_CMD__ERROR_UKNOWN_CMD; + + return res; +} + +int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, + uint8_t *cmd_buf, uint32_t buf_size) +{ + struct ras_cmd_ioctl *cmd = (struct ras_cmd_ioctl *)cmd_buf; + struct ras_core_context *cmd_core = NULL; + struct ras_cmd_dev_handle *cmd_handle = NULL; + int timeout = 60; + int res; + + cmd->cmd_res = RAS_CMD__ERROR_INVALID_CMD; + cmd->output_size = 0; + + if (!ras_core_is_enabled(ras_core)) + return RAS_CMD__ERROR_ACCESS_DENIED; + + if (cmd->cmd_id == RAS_CMD__QUERY_INTERFACE_INFO) { + cmd->cmd_res = amdgpu_ras_query_interface_info(ras_core, cmd); + } else if (cmd->cmd_id == RAS_CMD__GET_DEVICES_INFO) { + cmd->cmd_res = amdgpu_ras_get_devices_info(ras_core, cmd); + } else { + cmd_handle = (struct ras_cmd_dev_handle *)cmd->input_buff_raw; + cmd_core = ras_cmd_get_ras_core(cmd_handle->dev_handle); + if (!cmd_core) + return RAS_CMD__ERROR_INVALID_INPUT_DATA; + + while (ras_core_gpu_in_reset(cmd_core)) { + msleep(1000); + if (!timeout--) + return RAS_CMD__ERROR_TIMEOUT; + } + + + if (!ras_core_is_enabled(cmd_core)) + return RAS_CMD__ERROR_ACCESS_DENIED; + + res = amdgpu_ras_handle_cmd(cmd_core, cmd, NULL); + if (res == RAS_CMD__ERROR_UKNOWN_CMD) + res = rascore_handle_cmd(cmd_core, cmd, NULL); + + cmd->cmd_res = res; + } + + if ((cmd->cmd_res == RAS_CMD__SUCCESS) && + ((cmd->output_size + sizeof(*cmd)) > buf_size)) { + RAS_INFO("Insufficient command buffer size 0x%x!\n", buf_size); + return RAS_CMD__SUCCESS_EXEED_BUFFER; + } + + return RAS_CMD__SUCCESS; +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h new file mode 100644 index 000000000000..7017198f1bac --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_RAS_CMD_H__ +#define __AMDGPU_RAS_CMD_H__ +#include "ras.h" + +enum amdgpu_ras_cmd_id { + RAS_CMD__AMDGPU_BEGIN = RAS_CMD_ID_AMDGPU_START, + RAS_CMD__TRANSLATE_MEMORY_FD, + RAS_CMD__AMDGPU_SUPPORTED_MAX = RAS_CMD_ID_AMDGPU_END, +}; + +struct ras_cmd_translate_memory_fd_req { + struct ras_cmd_dev_handle dev; + uint32_t type; + uint32_t fd; + uint64_t address; + uint32_t reserved[4]; +}; + +struct ras_cmd_translate_memory_fd_rsp { + uint32_t version; + uint32_t padding; + uint64_t start; + uint64_t size; + uint32_t reserved[2]; +}; + +int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, + struct ras_cmd_ioctl *cmd, void *data); +int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, + uint8_t *cmd_buf, uint32_t buf_size); + +#endif -- cgit v1.2.3 From ffdab7f4e5707ac0827e5356893f25e7a1c1624c Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 18:44:11 +0800 Subject: drm/amd/ras: Add amdgpu ras system functions Add amdgpu ras system functions. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c | 268 +++++++++++++++++++++++ drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h | 109 +++++++++ 2 files changed, 377 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c new file mode 100644 index 000000000000..40071b876333 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "ras_sys.h" +#include "amdgpu_ras_mgr.h" +#include "amdgpu_ras.h" +#include "amdgpu_reset.h" + +static int amdgpu_ras_sys_detect_fatal_event(struct ras_core_context *ras_core, void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + uint64_t seq_no; + + seq_no = amdgpu_ras_mgr_gen_ras_event_seqno(adev, RAS_SEQNO_TYPE_UE); + RAS_DEV_INFO(adev, + "{%llu} Uncorrectable hardware error(ERREVENT_ATHUB_INTERRUPT) detected!\n", + seq_no); + + return amdgpu_ras_process_handle_unexpected_interrupt(adev, data); +} + +static int amdgpu_ras_sys_poison_consumption_event(struct ras_core_context *ras_core, + void *data) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ras_event_req *req = (struct ras_event_req *)data; + pasid_notify pasid_fn; + + if (!req) + return -EINVAL; + + if (req->pasid_fn) { + pasid_fn = (pasid_notify)req->pasid_fn; + pasid_fn(adev, req->pasid, req->data); + } + + return 0; +} + +static int amdgpu_ras_sys_gen_seqno(struct ras_core_context *ras_core, + enum ras_seqno_type seqno_type, uint64_t *seqno) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct ras_event_manager *event_mgr; + struct ras_event_state *event_state; + struct amdgpu_hive_info *hive; + enum ras_event_type event_type; + uint64_t seq_no; + + if (!ras_mgr || !seqno || + (seqno_type >= RAS_SEQNO_TYPE_COUNT_MAX)) + return -EINVAL; + + switch (seqno_type) { + case RAS_SEQNO_TYPE_UE: + event_type = RAS_EVENT_TYPE_FATAL; + break; + case RAS_SEQNO_TYPE_CE: + case RAS_SEQNO_TYPE_DE: + event_type = RAS_EVENT_TYPE_POISON_CREATION; + break; + case RAS_SEQNO_TYPE_POISON_CONSUMPTION: + event_type = RAS_EVENT_TYPE_POISON_CONSUMPTION; + break; + default: + event_type = RAS_EVENT_TYPE_INVALID; + break; + } + + hive = amdgpu_get_xgmi_hive(adev); + event_mgr = hive ? &hive->event_mgr : &ras_mgr->ras_event_mgr; + event_state = &event_mgr->event_state[event_type]; + if ((event_type == RAS_EVENT_TYPE_FATAL) && amdgpu_ras_in_recovery(adev)) { + seq_no = event_state->last_seqno; + } else { + seq_no = atomic64_inc_return(&event_mgr->seqno); + event_state->last_seqno = seq_no; + atomic64_inc(&event_state->count); + } + amdgpu_put_xgmi_hive(hive); + + *seqno = seq_no; + return 0; + +} + +static int amdgpu_ras_sys_event_notifier(struct ras_core_context *ras_core, + enum ras_notify_event event_id, void *data) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(ras_core->dev); + int ret = 0; + + switch (event_id) { + case RAS_EVENT_ID__BAD_PAGE_DETECTED: + schedule_delayed_work(&ras_mgr->retire_page_dwork, 0); + break; + case RAS_EVENT_ID__POISON_CONSUMPTION: + amdgpu_ras_sys_poison_consumption_event(ras_core, data); + break; + case RAS_EVENT_ID__RESERVE_BAD_PAGE: + ret = amdgpu_ras_reserve_page(ras_core->dev, *(uint64_t *)data); + break; + case RAS_EVENT_ID__FATAL_ERROR_DETECTED: + ret = amdgpu_ras_sys_detect_fatal_event(ras_core, data); + break; + case RAS_EVENT_ID__UPDATE_BAD_PAGE_NUM: + ret = amdgpu_dpm_send_hbm_bad_pages_num(ras_core->dev, *(uint32_t *)data); + break; + case RAS_EVENT_ID__UPDATE_BAD_CHANNEL_BITMAP: + ret = amdgpu_dpm_send_hbm_bad_channel_flag(ras_core->dev, *(uint32_t *)data); + break; + case RAS_EVENT_ID__DEVICE_RMA: + ras_log_ring_add_log_event(ras_core, RAS_LOG_EVENT_RMA, NULL, NULL); + ret = amdgpu_dpm_send_rma_reason(ras_core->dev); + break; + case RAS_EVENT_ID__RESET_GPU: + ret = amdgpu_ras_mgr_reset_gpu(ras_core->dev, *(uint32_t *)data); + break; + default: + RAS_DEV_WARN(ras_core->dev, "Invalid ras notify event:%d\n", event_id); + break; + } + + return ret; +} + +static u64 amdgpu_ras_sys_get_utc_second_timestamp(struct ras_core_context *ras_core) +{ + return ktime_get_real_seconds(); +} + +static int amdgpu_ras_sys_check_gpu_status(struct ras_core_context *ras_core, + uint32_t *status) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + uint32_t gpu_status = 0; + + if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) + gpu_status |= RAS_GPU_STATUS__IN_RESET; + + if (amdgpu_sriov_vf(adev)) + gpu_status |= RAS_GPU_STATUS__IS_VF; + + *status = gpu_status; + + return 0; +} + +static int amdgpu_ras_sys_get_device_system_info(struct ras_core_context *ras_core, + struct device_system_info *dev_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + + dev_info->device_id = adev->pdev->device; + dev_info->vendor_id = adev->pdev->vendor; + dev_info->socket_id = adev->smuio.funcs->get_socket_id(adev); + + return 0; +} + +static int amdgpu_ras_sys_gpu_reset_lock(struct ras_core_context *ras_core, + bool down, bool try) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int ret = 0; + + if (down && try) + ret = down_read_trylock(&adev->reset_domain->sem); + else if (down) + down_read(&adev->reset_domain->sem); + else + up_read(&adev->reset_domain->sem); + + return ret; +} + +static bool amdgpu_ras_sys_detect_ras_interrupt(struct ras_core_context *ras_core) +{ + return !!atomic_read(&amdgpu_ras_in_intr); +} + +static int amdgpu_ras_sys_get_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct psp_context *psp = &adev->psp; + struct psp_ring *psp_ring; + struct ta_mem_context *mem_ctx; + + if (mem_type == GPU_MEM_TYPE_RAS_PSP_RING) { + psp_ring = &psp->km_ring; + gpu_mem->mem_bo = adev->firmware.rbuf; + gpu_mem->mem_size = psp_ring->ring_size; + gpu_mem->mem_mc_addr = psp_ring->ring_mem_mc_addr; + gpu_mem->mem_cpu_addr = psp_ring->ring_mem; + } else if (mem_type == GPU_MEM_TYPE_RAS_PSP_CMD) { + gpu_mem->mem_bo = psp->cmd_buf_bo; + gpu_mem->mem_size = PSP_CMD_BUFFER_SIZE; + gpu_mem->mem_mc_addr = psp->cmd_buf_mc_addr; + gpu_mem->mem_cpu_addr = psp->cmd_buf_mem; + } else if (mem_type == GPU_MEM_TYPE_RAS_PSP_FENCE) { + gpu_mem->mem_bo = psp->fence_buf_bo; + gpu_mem->mem_size = PSP_FENCE_BUFFER_SIZE; + gpu_mem->mem_mc_addr = psp->fence_buf_mc_addr; + gpu_mem->mem_cpu_addr = psp->fence_buf; + } else if (mem_type == GPU_MEM_TYPE_RAS_TA_FW) { + gpu_mem->mem_bo = psp->fw_pri_bo; + gpu_mem->mem_size = PSP_1_MEG; + gpu_mem->mem_mc_addr = psp->fw_pri_mc_addr; + gpu_mem->mem_cpu_addr = psp->fw_pri_buf; + } else if (mem_type == GPU_MEM_TYPE_RAS_TA_CMD) { + mem_ctx = &psp->ras_context.context.mem_context; + gpu_mem->mem_bo = mem_ctx->shared_bo; + gpu_mem->mem_size = mem_ctx->shared_mem_size; + gpu_mem->mem_mc_addr = mem_ctx->shared_mc_addr; + gpu_mem->mem_cpu_addr = mem_ctx->shared_buf; + } else { + return -EINVAL; + } + + if (!gpu_mem->mem_bo || !gpu_mem->mem_size || + !gpu_mem->mem_mc_addr || !gpu_mem->mem_cpu_addr) { + RAS_DEV_ERR(ras_core->dev, "The ras psp gpu memory is invalid!\n"); + return -ENOMEM; + } + + return 0; +} + +static int amdgpu_ras_sys_put_gpu_mem(struct ras_core_context *ras_core, + enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem) +{ + + return 0; +} + +const struct ras_sys_func amdgpu_ras_sys_fn = { + .ras_notifier = amdgpu_ras_sys_event_notifier, + .get_utc_second_timestamp = amdgpu_ras_sys_get_utc_second_timestamp, + .gen_seqno = amdgpu_ras_sys_gen_seqno, + .check_gpu_status = amdgpu_ras_sys_check_gpu_status, + .get_device_system_info = amdgpu_ras_sys_get_device_system_info, + .gpu_reset_lock = amdgpu_ras_sys_gpu_reset_lock, + .detect_ras_interrupt = amdgpu_ras_sys_detect_ras_interrupt, + .get_gpu_mem = amdgpu_ras_sys_get_gpu_mem, + .put_gpu_mem = amdgpu_ras_sys_put_gpu_mem, +}; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h new file mode 100644 index 000000000000..c48ff26525d6 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_SYS_H__ +#define __RAS_SYS_H__ +#include +#include +#include +#include "amdgpu.h" + +#define RAS_DEV_ERR(device, fmt, ...) \ + do { \ + if (device) \ + dev_err(((struct amdgpu_device *)device)->dev, fmt, ##__VA_ARGS__); \ + else \ + printk(KERN_ERR fmt, ##__VA_ARGS__); \ + } while (0) + +#define RAS_DEV_WARN(device, fmt, ...) \ + do { \ + if (device) \ + dev_warn(((struct amdgpu_device *)device)->dev, fmt, ##__VA_ARGS__); \ + else \ + printk(KERN_WARNING fmt, ##__VA_ARGS__); \ + } while (0) + +#define RAS_DEV_INFO(device, fmt, ...) \ + do { \ + if (device) \ + dev_info(((struct amdgpu_device *)device)->dev, fmt, ##__VA_ARGS__); \ + else \ + printk(KERN_INFO fmt, ##__VA_ARGS__); \ + } while (0) + +#define RAS_DEV_DBG(device, fmt, ...) \ + do { \ + if (device) \ + dev_dbg(((struct amdgpu_device *)device)->dev, fmt, ##__VA_ARGS__); \ + else \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + } while (0) + +#define RAS_INFO(fmt, ...) printk(KERN_INFO fmt, ##__VA_ARGS__) + +#define RAS_DEV_RREG32_SOC15(dev, ip, inst, reg) \ +({ \ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; \ + __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ + 0, ip##_HWIP, inst); \ +}) + +#define RAS_DEV_WREG32_SOC15(dev, ip, inst, reg, value) \ +({ \ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; \ + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \ + value, 0, ip##_HWIP, inst); \ +}) + +/* GET_INST returns the physical instance corresponding to a logical instance */ +#define RAS_GET_INST(dev, ip, inst) \ +({ \ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; \ + adev->ip_map.logical_to_dev_inst ? \ + adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst; \ +}) + +#define RAS_GET_MASK(dev, ip, mask) \ +({ \ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; \ + (adev->ip_map.logical_to_dev_mask ? \ + adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask); \ +}) + +static inline void *ras_radix_tree_delete_iter(struct radix_tree_root *root, void *iter) +{ + return radix_tree_delete(root, ((struct radix_tree_iter *)iter)->index); +} + +static inline long ras_wait_event_interruptible_timeout(void *wq_head, + int (*condition)(void *param), void *param, unsigned int timeout) +{ + return wait_event_interruptible_timeout(*(wait_queue_head_t *)wq_head, + condition(param), timeout); +} + +extern const struct ras_sys_func amdgpu_ras_sys_fn; + +#endif -- cgit v1.2.3 From e221ac6f4271689131765b9692327baedf343e38 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 18:46:06 +0800 Subject: drm/amd/ras: Amdgpu preprocesses ras interrupts Amdgpu preprocesses ras interrupts. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c | 126 +++++++++++++++++++++ .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h | 37 ++++++ 2 files changed, 163 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c new file mode 100644 index 000000000000..6727fc9a2b9b --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_reset.h" +#include "amdgpu_xgmi.h" +#include "ras_sys.h" +#include "amdgpu_ras_mgr.h" +#include "amdgpu_ras_process.h" + +#define RAS_MGR_RETIRE_PAGE_INTERVAL 100 + +static void ras_process_retire_page_dwork(struct work_struct *work) +{ + struct amdgpu_ras_mgr *ras_mgr = + container_of(work, struct amdgpu_ras_mgr, retire_page_dwork.work); + struct amdgpu_device *adev = ras_mgr->adev; + int ret; + + if (amdgpu_ras_is_rma(adev)) + return; + + /* If gpu reset is ongoing, delay retiring the bad pages */ + if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) { + schedule_delayed_work(&ras_mgr->retire_page_dwork, + msecs_to_jiffies(RAS_MGR_RETIRE_PAGE_INTERVAL * 3)); + return; + } + + ret = ras_umc_handle_bad_pages(ras_mgr->ras_core, NULL); + if (!ret) + schedule_delayed_work(&ras_mgr->retire_page_dwork, + msecs_to_jiffies(RAS_MGR_RETIRE_PAGE_INTERVAL)); +} + +int amdgpu_ras_process_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + INIT_DELAYED_WORK(&ras_mgr->retire_page_dwork, ras_process_retire_page_dwork); + + return 0; +} + +int amdgpu_ras_process_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + /* Save all cached bad pages to eeprom */ + flush_delayed_work(&ras_mgr->retire_page_dwork); + cancel_delayed_work_sync(&ras_mgr->retire_page_dwork); + return 0; +} + +int amdgpu_ras_process_handle_umc_interrupt(struct amdgpu_device *adev, void *data) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!ras_mgr->ras_core) + return -EINVAL; + + return ras_process_add_interrupt_req(ras_mgr->ras_core, NULL, true); +} + +int amdgpu_ras_process_handle_unexpected_interrupt(struct amdgpu_device *adev, void *data) +{ + amdgpu_ras_set_fed(adev, true); + return amdgpu_ras_mgr_reset_gpu(adev, AMDGPU_RAS_GPU_RESET_MODE1_RESET); +} + +int amdgpu_ras_process_handle_consumption_interrupt(struct amdgpu_device *adev, void *data) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct ras_ih_info *ih_info = (struct ras_ih_info *)data; + struct ras_event_req req; + uint64_t seqno; + + if (!ih_info) + return -EINVAL; + + memset(&req, 0, sizeof(req)); + req.block = ih_info->block; + req.data = ih_info->data; + req.pasid = ih_info->pasid; + req.pasid_fn = ih_info->pasid_fn; + req.reset = ih_info->reset; + + seqno = ras_core_get_seqno(ras_mgr->ras_core, + RAS_SEQNO_TYPE_POISON_CONSUMPTION, false); + + /* When the ACA register cannot be read from FW, the poison + * consumption seqno in the fifo will not pop up, so it is + * necessary to check whether the seqno is the previous seqno. + */ + if (seqno == ras_mgr->last_poison_consumption_seqno) { + /* Pop and discard the previous seqno */ + ras_core_get_seqno(ras_mgr->ras_core, + RAS_SEQNO_TYPE_POISON_CONSUMPTION, true); + seqno = ras_core_get_seqno(ras_mgr->ras_core, + RAS_SEQNO_TYPE_POISON_CONSUMPTION, false); + } + ras_mgr->last_poison_consumption_seqno = seqno; + req.seqno = seqno; + + return ras_process_add_interrupt_req(ras_mgr->ras_core, &req, false); +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h new file mode 100644 index 000000000000..b9502bd21beb --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef __AMDGPU_RAS_PROCESS_H__ +#define __AMDGPU_RAS_PROCESS_H__ +#include "ras_process.h" +#include "amdgpu_ras_mgr.h" + +enum ras_ih_type; +int amdgpu_ras_process_init(struct amdgpu_device *adev); +int amdgpu_ras_process_fini(struct amdgpu_device *adev); +int amdgpu_ras_process_handle_umc_interrupt(struct amdgpu_device *adev, + void *data); +int amdgpu_ras_process_handle_unexpected_interrupt(struct amdgpu_device *adev, + void *data); +int amdgpu_ras_process_handle_consumption_interrupt(struct amdgpu_device *adev, + void *data); +#endif -- cgit v1.2.3 From fa0b203cd902608a214130c83e6a6d76fe04b708 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 18:03:28 +0800 Subject: drm/amd/ras: Add amdgpu ras management function. Add amdgpu system configuration parameters and functions needed by rascore. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 560 +++++++++++++++++++++++ drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h | 73 +++ 2 files changed, 633 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c create mode 100644 drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c new file mode 100644 index 000000000000..a038c87c045d --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_reset.h" +#include "amdgpu_xgmi.h" +#include "ras_sys.h" +#include "amdgpu_ras_mgr.h" +#include "amdgpu_ras_cmd.h" +#include "amdgpu_ras_process.h" +#include "amdgpu_ras_eeprom_i2c.h" +#include "amdgpu_ras_mp1_v13_0.h" +#include "amdgpu_ras_nbio_v7_9.h" + +#define MAX_SOCKET_NUM_PER_HIVE 8 +#define MAX_AID_NUM_PER_SOCKET 4 +#define MAX_XCD_NUM_PER_AID 2 + +/* typical ECC bad page rate is 1 bad page per 100MB VRAM */ +#define ESTIMATE_BAD_PAGE_THRESHOLD(size) ((size)/(100 * 1024 * 1024ULL)) + +#define COUNT_BAD_PAGE_THRESHOLD(size) (((size) >> 21) << 4) + +/* Reserve 8 physical dram row for possible retirement. + * In worst cases, it will lose 8 * 2MB memory in vram domain + */ +#define RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20) + + +static void ras_mgr_init_event_mgr(struct ras_event_manager *mgr) +{ + struct ras_event_state *event_state; + int i; + + memset(mgr, 0, sizeof(*mgr)); + atomic64_set(&mgr->seqno, 0); + + for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) { + event_state = &mgr->event_state[i]; + event_state->last_seqno = RAS_EVENT_INVALID_ID; + atomic64_set(&event_state->count, 0); + } +} + +static void amdgpu_ras_mgr_init_event_mgr(struct ras_core_context *ras_core) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct ras_event_manager *event_mgr; + struct amdgpu_hive_info *hive; + + hive = amdgpu_get_xgmi_hive(adev); + event_mgr = hive ? &hive->event_mgr : &ras_mgr->ras_event_mgr; + + /* init event manager with node 0 on xgmi system */ + if (!amdgpu_reset_in_recovery(adev)) { + if (!hive || adev->gmc.xgmi.node_id == 0) + ras_mgr_init_event_mgr(event_mgr); + } + + if (hive) + amdgpu_put_xgmi_hive(hive); +} + +static int amdgpu_ras_mgr_init_aca_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_aca_config *aca_cfg = &config->aca_cfg; + + aca_cfg->socket_num_per_hive = MAX_SOCKET_NUM_PER_HIVE; + aca_cfg->aid_num_per_socket = MAX_AID_NUM_PER_SOCKET; + aca_cfg->xcd_num_per_aid = MAX_XCD_NUM_PER_AID; + + return 0; +} + +static int amdgpu_ras_mgr_init_eeprom_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_eeprom_config *eeprom_cfg = &config->eeprom_cfg; + + eeprom_cfg->eeprom_sys_fn = &amdgpu_ras_eeprom_i2c_sys_func; + eeprom_cfg->eeprom_i2c_adapter = adev->pm.ras_eeprom_i2c_bus; + if (eeprom_cfg->eeprom_i2c_adapter) { + const struct i2c_adapter_quirks *quirks = + ((struct i2c_adapter *)eeprom_cfg->eeprom_i2c_adapter)->quirks; + + if (quirks) { + eeprom_cfg->max_i2c_read_len = quirks->max_read_len; + eeprom_cfg->max_i2c_write_len = quirks->max_write_len; + } + } + + /* + * amdgpu_bad_page_threshold is used to config + * the threshold for the number of bad pages. + * -1: Threshold is set to default value + * Driver will issue a warning message when threshold is reached + * and continue runtime services. + * 0: Disable bad page retirement + * Driver will not retire bad pages + * which is intended for debugging purpose. + * -2: Threshold is determined by a formula + * that assumes 1 bad page per 100M of local memory. + * Driver will continue runtime services when threhold is reached. + * 0 < threshold < max number of bad page records in EEPROM, + * A user-defined threshold is set + * Driver will halt runtime services when this custom threshold is reached. + */ + if (amdgpu_bad_page_threshold == NONSTOP_OVER_THRESHOLD) + eeprom_cfg->eeprom_record_threshold_count = + ESTIMATE_BAD_PAGE_THRESHOLD(adev->gmc.mc_vram_size); + else if (amdgpu_bad_page_threshold == WARN_NONSTOP_OVER_THRESHOLD) + eeprom_cfg->eeprom_record_threshold_count = + COUNT_BAD_PAGE_THRESHOLD(RAS_RESERVED_VRAM_SIZE_DEFAULT); + else + eeprom_cfg->eeprom_record_threshold_count = amdgpu_bad_page_threshold; + + eeprom_cfg->eeprom_record_threshold_config = amdgpu_bad_page_threshold; + + return 0; +} + +static int amdgpu_ras_mgr_init_mp1_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_mp1_config *mp1_cfg = &config->mp1_cfg; + int ret = 0; + + switch (config->mp1_ip_version) { + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): + mp1_cfg->mp1_sys_fn = &amdgpu_ras_mp1_sys_func_v13_0; + break; + default: + RAS_DEV_ERR(adev, + "The mp1(0x%x) ras config is not right!\n", + config->mp1_ip_version); + ret = -EINVAL; + break; + } + + return ret; +} + +static int amdgpu_ras_mgr_init_nbio_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_nbio_config *nbio_cfg = &config->nbio_cfg; + int ret = 0; + + switch (config->nbio_ip_version) { + case IP_VERSION(7, 9, 0): + nbio_cfg->nbio_sys_fn = &amdgpu_ras_nbio_sys_func_v7_9; + break; + default: + RAS_DEV_ERR(adev, + "The nbio(0x%x) ras config is not right!\n", + config->mp1_ip_version); + ret = -EINVAL; + break; + } + + return ret; +} + +static int amdgpu_ras_mgr_get_ras_psp_system_status(struct ras_core_context *ras_core, + struct ras_psp_sys_status *status) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + struct ta_context *context = &adev->psp.ras_context.context; + + status->initialized = context->initialized; + status->session_id = context->session_id; + status->psp_cmd_mutex = &adev->psp.mutex; + + return 0; +} + +static int amdgpu_ras_mgr_get_ras_ta_init_param(struct ras_core_context *ras_core, + struct ras_ta_init_param *ras_ta_param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + uint32_t nps_mode; + + if (amdgpu_ras_is_poison_mode_supported(adev)) + ras_ta_param->poison_mode_en = 1; + + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) + ras_ta_param->dgpu_mode = 1; + + ras_ta_param->xcc_mask = adev->gfx.xcc_mask; + ras_ta_param->channel_dis_num = hweight32(adev->gmc.m_half_use) * 2; + + ras_ta_param->active_umc_mask = adev->umc.active_mask; + + if (!amdgpu_ras_mgr_get_curr_nps_mode(adev, &nps_mode)) + ras_ta_param->nps_mode = nps_mode; + + return 0; +} + +const struct ras_psp_sys_func amdgpu_ras_psp_sys_func = { + .get_ras_psp_system_status = amdgpu_ras_mgr_get_ras_psp_system_status, + .get_ras_ta_init_param = amdgpu_ras_mgr_get_ras_ta_init_param, +}; + +static int amdgpu_ras_mgr_init_psp_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_psp_config *psp_cfg = &config->psp_cfg; + + psp_cfg->psp_sys_fn = &amdgpu_ras_psp_sys_func; + + return 0; +} + +static int amdgpu_ras_mgr_init_umc_config(struct amdgpu_device *adev, + struct ras_core_config *config) +{ + struct ras_umc_config *umc_cfg = &config->umc_cfg; + + umc_cfg->umc_vram_type = adev->gmc.vram_type; + + return 0; +} + +static struct ras_core_context *amdgpu_ras_mgr_create_ras_core(struct amdgpu_device *adev) +{ + struct ras_core_config init_config; + + memset(&init_config, 0, sizeof(init_config)); + + init_config.umc_ip_version = amdgpu_ip_version(adev, UMC_HWIP, 0); + init_config.mp1_ip_version = amdgpu_ip_version(adev, MP1_HWIP, 0); + init_config.gfx_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); + init_config.nbio_ip_version = amdgpu_ip_version(adev, NBIO_HWIP, 0); + init_config.psp_ip_version = amdgpu_ip_version(adev, MP1_HWIP, 0); + + if (init_config.umc_ip_version == IP_VERSION(12, 0, 0)) + init_config.aca_ip_version = IP_VERSION(1, 0, 0); + + init_config.sys_fn = &amdgpu_ras_sys_fn; + init_config.ras_eeprom_supported = true; + init_config.poison_supported = + amdgpu_ras_is_poison_mode_supported(adev); + + amdgpu_ras_mgr_init_aca_config(adev, &init_config); + amdgpu_ras_mgr_init_eeprom_config(adev, &init_config); + amdgpu_ras_mgr_init_mp1_config(adev, &init_config); + amdgpu_ras_mgr_init_nbio_config(adev, &init_config); + amdgpu_ras_mgr_init_psp_config(adev, &init_config); + amdgpu_ras_mgr_init_umc_config(adev, &init_config); + + return ras_core_create(&init_config); +} + +static int amdgpu_ras_mgr_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_mgr *ras_mgr; + int ret = 0; + + ras_mgr = kzalloc(sizeof(*ras_mgr), GFP_KERNEL); + if (!ras_mgr) + return -EINVAL; + + con->ras_mgr = ras_mgr; + ras_mgr->adev = adev; + + ras_mgr->ras_core = amdgpu_ras_mgr_create_ras_core(adev); + if (!ras_mgr->ras_core) { + RAS_DEV_ERR(adev, "Failed to create ras core!\n"); + ret = -EINVAL; + goto err; + } + + ras_mgr->ras_core->dev = adev; + + amdgpu_ras_process_init(adev); + ras_core_sw_init(ras_mgr->ras_core); + amdgpu_ras_mgr_init_event_mgr(ras_mgr->ras_core); + return 0; + +err: + kfree(ras_mgr); + return ret; +} + +static int amdgpu_ras_mgr_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_mgr *ras_mgr = (struct amdgpu_ras_mgr *)con->ras_mgr; + + if (!ras_mgr) + return 0; + + amdgpu_ras_process_fini(adev); + ras_core_sw_fini(ras_mgr->ras_core); + ras_core_destroy(ras_mgr->ras_core); + ras_mgr->ras_core = NULL; + + kfree(con->ras_mgr); + con->ras_mgr = NULL; + + return 0; +} + +static int amdgpu_ras_mgr_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + int ret; + + if (!ras_mgr || !ras_mgr->ras_core) + return -EINVAL; + + ret = ras_core_hw_init(ras_mgr->ras_core); + if (ret) { + RAS_DEV_ERR(adev, "Failed to initialize ras core!\n"); + return ret; + } + + ras_mgr->ras_is_ready = true; + + RAS_DEV_INFO(adev, "AMDGPU RAS Is Ready.\n"); + return 0; +} + +static int amdgpu_ras_mgr_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!ras_mgr || !ras_mgr->ras_core) + return -EINVAL; + + ras_core_hw_fini(ras_mgr->ras_core); + + ras_mgr->ras_is_ready = false; + + return 0; +} + +struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context(struct amdgpu_device *adev) +{ + if (!adev || !adev->psp.ras_context.ras) + return NULL; + + return (struct amdgpu_ras_mgr *)adev->psp.ras_context.ras->ras_mgr; +} + +static const struct amd_ip_funcs ras_v1_0_ip_funcs = { + .name = "ras_v1_0", + .sw_init = amdgpu_ras_mgr_sw_init, + .sw_fini = amdgpu_ras_mgr_sw_fini, + .hw_init = amdgpu_ras_mgr_hw_init, + .hw_fini = amdgpu_ras_mgr_hw_fini, +}; + +int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!ras_mgr || !ras_mgr->ras_core) + return -EPERM; + + if (amdgpu_sriov_vf(adev)) + return -EPERM; + + RAS_DEV_INFO(adev, "Enable amdgpu unified ras!"); + return ras_core_set_status(ras_mgr->ras_core, enable); +} + +bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!ras_mgr || !ras_mgr->ras_core) + return false; + + if (amdgpu_sriov_vf(adev)) + return false; + + return ras_core_is_enabled(ras_mgr->ras_core); +} + +static bool amdgpu_ras_mgr_is_ready(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (ras_mgr && ras_mgr->ras_core && ras_mgr->ras_is_ready && + ras_core_is_ready(ras_mgr->ras_core)) + return true; + + return false; +} + +int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + return ras_core_handle_nbio_irq(ras_mgr->ras_core, data); +} + +uint64_t amdgpu_ras_mgr_gen_ras_event_seqno(struct amdgpu_device *adev, + enum ras_seqno_type seqno_type) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + int ret; + uint64_t seq_no; + + if (!amdgpu_ras_mgr_is_ready(adev) || + (seqno_type >= RAS_SEQNO_TYPE_COUNT_MAX)) + return 0; + + seq_no = ras_core_gen_seqno(ras_mgr->ras_core, seqno_type); + + if ((seqno_type == RAS_SEQNO_TYPE_DE) || + (seqno_type == RAS_SEQNO_TYPE_POISON_CONSUMPTION)) { + ret = ras_core_put_seqno(ras_mgr->ras_core, seqno_type, seq_no); + if (ret) + RAS_DEV_WARN(adev, "There are too many ras interrupts!"); + } + + return seq_no; +} + +int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct ras_ih_info *ih_info = (struct ras_ih_info *)data; + uint64_t seq_no = 0; + int ret = 0; + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + if (ih_info && (ih_info->block == AMDGPU_RAS_BLOCK__UMC)) { + if (ras_mgr->ras_core->poison_supported) { + seq_no = amdgpu_ras_mgr_gen_ras_event_seqno(adev, RAS_SEQNO_TYPE_DE); + RAS_DEV_INFO(adev, + "{%llu} RAS poison is created, no user action is needed.\n", + seq_no); + } + + ret = amdgpu_ras_process_handle_umc_interrupt(adev, ih_info); + } else if (ras_mgr->ras_core->poison_supported) { + ret = amdgpu_ras_process_handle_unexpected_interrupt(adev, ih_info); + } else { + RAS_DEV_WARN(adev, + "No RAS interrupt handler for non-UMC block with poison disabled.\n"); + } + + return ret; +} + +int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data) +{ + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + return amdgpu_ras_process_handle_consumption_interrupt(adev, data); +} + +int amdgpu_ras_mgr_update_ras_ecc(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + return ras_core_update_ecc_info(ras_mgr->ras_core); +} + +int amdgpu_ras_mgr_reset_gpu(struct amdgpu_device *adev, uint32_t flags) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + con->gpu_reset_flags |= flags; + return amdgpu_ras_reset_gpu(adev); +} + +bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return false; + + return ras_eeprom_check_safety_watermark(ras_mgr->ras_core); +} + +int amdgpu_ras_mgr_get_curr_nps_mode(struct amdgpu_device *adev, + uint32_t *nps_mode) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + uint32_t mode; + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EINVAL; + + mode = ras_core_get_curr_nps_mode(ras_mgr->ras_core); + if (!mode || mode > AMDGPU_NPS8_PARTITION_MODE) + return -EINVAL; + + *nps_mode = mode; + + return 0; +} + +bool amdgpu_ras_mgr_check_retired_addr(struct amdgpu_device *adev, + uint64_t addr) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return false; + + return ras_umc_check_retired_addr(ras_mgr->ras_core, addr); +} + +bool amdgpu_ras_mgr_is_rma(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!ras_mgr || !ras_mgr->ras_core || !ras_mgr->ras_is_ready) + return false; + + return ras_core_gpu_is_rma(ras_mgr->ras_core); +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h new file mode 100644 index 000000000000..fa761de381c1 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef __AMDGPU_RAS_MGR_H__ +#define __AMDGPU_RAS_MGR_H__ +#include "ras.h" +#include "amdgpu_ras_process.h" + +enum ras_ih_type { + RAS_IH_NONE, + RAS_IH_FROM_BLOCK_CONTROLLER, + RAS_IH_FROM_CONSUMER_CLIENT, + RAS_IH_FROM_FATAL_ERROR, +}; + +struct ras_ih_info { + uint32_t block; + union { + struct amdgpu_iv_entry iv_entry; + struct { + uint16_t pasid; + uint32_t reset; + pasid_notify pasid_fn; + void *data; + }; + }; +}; + +struct amdgpu_ras_mgr { + struct amdgpu_device *adev; + struct ras_core_context *ras_core; + struct delayed_work retire_page_dwork; + struct ras_event_manager ras_event_mgr; + uint64_t last_poison_consumption_seqno; + bool ras_is_ready; +}; + +struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context( + struct amdgpu_device *adev); +int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable); +bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev); +int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); +int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); +int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data); +int amdgpu_ras_mgr_update_ras_ecc(struct amdgpu_device *adev); +int amdgpu_ras_mgr_reset_gpu(struct amdgpu_device *adev, uint32_t flags); +uint64_t amdgpu_ras_mgr_gen_ras_event_seqno(struct amdgpu_device *adev, + enum ras_seqno_type seqno_type); +bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev); +int amdgpu_ras_mgr_get_curr_nps_mode(struct amdgpu_device *adev, uint32_t *nps_mode); +bool amdgpu_ras_mgr_check_retired_addr(struct amdgpu_device *adev, + uint64_t addr); +bool amdgpu_ras_mgr_is_rma(struct amdgpu_device *adev); +#endif -- cgit v1.2.3 From 7e1252105e6a5414406faf484e4561b12cee781c Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 17 Mar 2025 18:04:10 +0800 Subject: drm/amd/ras: Add files to amdgpu ras manager makefile Add files to amdgpu ras manager makefile. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/Makefile | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/Makefile b/drivers/gpu/drm/amd/ras/ras_mgr/Makefile index e69de29bb2d1..5e5a2cfa4068 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/Makefile +++ b/drivers/gpu/drm/amd/ras/ras_mgr/Makefile @@ -0,0 +1,33 @@ +# Copyright 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +RAS_MGR_FILES = amdgpu_ras_sys.o \ + amdgpu_ras_mgr.o \ + amdgpu_ras_eeprom_i2c.o \ + amdgpu_ras_mp1_v13_0.o \ + amdgpu_ras_cmd.o \ + amdgpu_ras_process.o \ + amdgpu_ras_nbio_v7_9.o + + +RAS_MGR = $(addprefix $(AMD_GPU_RAS_PATH)/ras_mgr/, $(RAS_MGR_FILES)) + +AMD_GPU_RAS_FILES += $(RAS_MGR) + -- cgit v1.2.3 From 8397f38d7c0e5e7d9c166eba4d5bc0212eec7254 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 15:13:20 +0800 Subject: drm/amd/ras: Add unified ras module top-level makefile Add unified ras module top-level makefile. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/Makefile | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 drivers/gpu/drm/amd/ras/Makefile diff --git a/drivers/gpu/drm/amd/ras/Makefile b/drivers/gpu/drm/amd/ras/Makefile new file mode 100644 index 000000000000..bbdaba811d34 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/Makefile @@ -0,0 +1,34 @@ +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +ifeq ($(AMD_GPU_RAS_MGR),) + AMD_GPU_RAS_MGR := ras_mgr +endif + +subdir-ccflags-y += -I$(AMD_GPU_RAS_FULL_PATH)/rascore +subdir-ccflags-y += -I$(AMD_GPU_RAS_FULL_PATH)/$(AMD_GPU_RAS_MGR) + +RAS_LIBS = $(AMD_GPU_RAS_MGR) rascore + +AMD_RAS = $(addsuffix /Makefile, $(addprefix $(AMD_GPU_RAS_FULL_PATH)/,$(RAS_LIBS))) + +include $(AMD_RAS) + -- cgit v1.2.3 From 0975c4deb6c2a5abc1865f0ba5502b10c87b7d27 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 11 Sep 2025 15:37:32 -0400 Subject: drm/amd/display: DML2.1 Reintegration [Summary of changes] - Updated structs - Renaming of variables for clarity Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/inc/dml_top_display_cfg_types.h | 11 + .../dml2/dml21/inc/dml_top_soc_parameter_types.h | 7 +- .../amd/display/dc/dml2/dml21/inc/dml_top_types.h | 13 + .../dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 55 +-- .../dml2/dml21/src/dml2_core/dml2_core_factory.c | 2 + .../dml21/src/dml2_core/dml2_core_shared_types.h | 10 +- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 459 +++++++++++---------- .../dml21/src/inc/dml2_internal_shared_types.h | 36 +- 8 files changed, 336 insertions(+), 257 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index e8dc6471c0be..13749c9fcf18 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -49,6 +49,11 @@ enum dml2_source_format_class { dml2_422_packed_12 = 18 }; +enum dml2_sample_positioning { + dml2_interstitial = 0, + dml2_cosited = 1 +}; + enum dml2_rotation_angle { dml2_rotation_0 = 0, dml2_rotation_90 = 1, @@ -222,7 +227,11 @@ struct dml2_composition_cfg { struct { bool enabled; + bool easf_enabled; + bool isharp_enabled; bool upsp_enabled; + enum dml2_sample_positioning upsp_sample_positioning; + unsigned int upsp_vtaps; struct { double h_ratio; double v_ratio; @@ -385,6 +394,7 @@ struct dml2_plane_parameters { // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override. long reserved_vblank_time_ns; unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay + unsigned int vactive_latency_to_hide_for_pstate_admissibility_us; unsigned int gpuvm_min_page_size_kbytes; unsigned int hostvm_min_page_size_kbytes; @@ -456,6 +466,7 @@ struct dml2_display_cfg { bool enable; bool value; } force_nom_det_size_kbytes; + bool mode_support_check_disable; bool mcache_admissibility_check_disable; bool surface_viewport_size_check_disable; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 176f55947664..4a9a0d5a09b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -145,6 +145,8 @@ struct dml2_soc_bb { struct dml2_soc_vmin_clock_limits vmin_limit; double lower_bound_bandwidth_dchub; + double fraction_of_urgent_bandwidth_nominal_target; + double fraction_of_urgent_bandwidth_flip_target; unsigned int dprefclk_mhz; unsigned int xtalclk_mhz; unsigned int pcie_refclk_mhz; @@ -170,6 +172,7 @@ struct dml2_soc_bb { struct dml2_ip_capabilities { unsigned int pipe_count; unsigned int otg_count; + unsigned int TDLUT_33cube_count; unsigned int num_dsc; unsigned int max_num_dp2p0_streams; unsigned int max_num_hdmi_frl_outputs; @@ -188,7 +191,9 @@ struct dml2_ip_capabilities { unsigned int subvp_prefetch_end_to_mall_start_us; unsigned int subvp_fw_processing_delay; unsigned int max_vactive_det_fill_delay_us; - + unsigned int ppt_max_allow_delay_ns; + unsigned int temp_read_max_allow_delay_us; + unsigned int dummy_pstate_max_allow_delay_us; /* FAMS2 delays */ struct { unsigned int max_allow_delay_us; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 41adb1104d0f..8646ce5f1c01 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -70,6 +70,8 @@ struct dml2_pmo_options { bool disable_dyn_odm; bool disable_dyn_odm_for_multi_stream; bool disable_dyn_odm_for_stream_with_svp; + struct dml2_pmo_pstate_strategy *override_strategy_lists[DML2_MAX_PLANES]; + unsigned int num_override_strategies_per_list[DML2_MAX_PLANES]; }; struct dml2_options { @@ -193,6 +195,14 @@ struct dml2_mcache_surface_allocation { } informative; }; +enum dml2_pstate_type { + dml2_pstate_type_uclk, + dml2_pstate_type_ppt, + dml2_pstate_type_temp_read, + dml2_pstate_type_dummy_pstate, + dml2_pstate_type_count +}; + enum dml2_pstate_method { dml2_pstate_method_na = 0, /* hw exclusive modes */ @@ -310,6 +320,7 @@ struct dml2_mode_support_info { bool NumberOfOTGSupport; bool NumberOfHDMIFRLSupport; bool NumberOfDP2p0Support; + bool NumberOfTDLUT33cubeSupport; bool WritebackScaleRatioAndTapsSupport; bool CursorSupport; bool PitchSupport; @@ -357,6 +368,8 @@ struct dml2_mode_support_info { unsigned int AlignedCPitch[DML2_MAX_PLANES]; bool g6_temp_read_support; bool temp_read_or_ppt_support; + bool qos_bandwidth_support; + bool dcfclk_support; }; // dml2_mode_support_info struct dml2_display_cfg_programming { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index bf62d42b3f78..4ccdb179b001 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12756,7 +12756,7 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna { const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; - const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index]; + const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index]; struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; @@ -12771,24 +12771,24 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna /* from display configuration */ base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; - base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal - stream_descriptor->timing.v_front_porch); - base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - stream_descriptor->timing.v_active); base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; /* from meta */ base_programming->otg_vline_time_ns = - (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0); - base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; - base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; - base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; - base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - + (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0); + base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines; + base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines; + base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines; + base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - - stream_fams2_meta->method_drr.programming_delay_otg_vlines); - base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; - base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; + stream_pstate_meta->method_drr.programming_delay_otg_vlines); + base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines; + base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal; /* from core */ base_programming->config.bits.min_ttu_vblank_usable = true; @@ -12807,11 +12807,11 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna /* legacy vactive */ base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; sub_programming->legacy.vactive_det_fill_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; base_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; + (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline; base_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; + (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline; base_programming->config.bits.clamp_vtotal_min = true; break; case dml2_pstate_method_vblank: @@ -12819,22 +12819,22 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna /* legacy vblank */ base_programming->type = FAMS2_STREAM_TYPE_VBLANK; base_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; + (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline; base_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; + (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline; base_programming->config.bits.clamp_vtotal_min = true; break; case dml2_pstate_method_fw_drr: /* drr */ base_programming->type = FAMS2_STREAM_TYPE_DRR; sub_programming->drr.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; + (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines; sub_programming->drr.nom_stretched_vtotal = - (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; + (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal; base_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; + (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline; base_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; + (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline; /* drr only clamps to vtotal min for single display */ base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; sub_programming->drr.only_stretch_if_required = true; @@ -12847,13 +12847,13 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); sub_programming->subvp.vratio_denominator = 1000; sub_programming->subvp.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; + (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines; sub_programming->subvp.prefetch_to_mall_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; sub_programming->subvp.phantom_vtotal = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; + (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal; sub_programming->subvp.phantom_vactive = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; + (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive; sub_programming->subvp.config.bits.is_multi_planar = plane_descriptor->surface.plane1.height > 0; sub_programming->subvp.config.bits.is_yuv420 = @@ -12862,9 +12862,9 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna plane_descriptor->pixel_format == dml2_420_12; base_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; + (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline; base_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; + (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline; base_programming->config.bits.clamp_vtotal_min = true; break; case dml2_pstate_method_reserved_hw: @@ -13027,7 +13027,10 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; + out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport; out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; + out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support; + out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support; for (k = 0; k < out->display_config.num_planes; k++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 640087e862f8..cc4f0663c6d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -15,6 +15,8 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance memset(out, 0, sizeof(struct dml2_core_instance)); + out->project_id = project_id; + switch (project_id) { case dml2_project_dcn4x_stage1: result = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index ffb8c09f37a5..ff1c47347610 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -37,6 +37,7 @@ struct dml2_core_ip_params { unsigned int writeback_interface_buffer_size_kbytes; unsigned int max_num_dpp; unsigned int max_num_otg; + unsigned int TDLUT_33cube_count; unsigned int max_num_wb; unsigned int max_dchub_pscl_bw_pix_per_clk; unsigned int max_pscl_lb_bw_pix_per_clk; @@ -46,6 +47,7 @@ struct dml2_core_ip_params { double max_vscl_ratio; unsigned int max_hscl_taps; unsigned int max_vscl_taps; + unsigned int odm_combine_support_mask; unsigned int num_dsc; unsigned int maximum_dsc_bits_per_component; unsigned int maximum_pixels_per_line_per_dsc_unit; @@ -82,7 +84,6 @@ struct dml2_core_ip_params { unsigned int subvp_swath_height_margin_lines; unsigned int subvp_fw_processing_delay_us; unsigned int subvp_pstate_allow_width_us; - // MRQ bool dcn_mrq_present; unsigned int zero_size_buffer_entries; @@ -103,6 +104,8 @@ struct dml2_core_internal_DmlPipe { unsigned int DPPPerSurface; bool ScalerEnabled; bool UPSPEnabled; + unsigned int UPSPVTaps; + enum dml2_sample_positioning UPSPSamplePositioning; enum dml2_rotation_angle RotationAngle; bool mirrored; unsigned int ViewportHeight; @@ -230,6 +233,7 @@ struct dml2_core_internal_mode_support_info { bool MSOOrODMSplitWithNonDPLink; bool NotEnoughLanesForMSO; bool NumberOfOTGSupport; + bool NumberOfTDLUT33cubeSupport; bool NumberOfHDMIFRLSupport; bool NumberOfDP2p0Support; bool WritebackScaleRatioAndTapsSupport; @@ -1306,7 +1310,7 @@ struct dml2_core_calcs_CalculateVMRowAndSwath_params { unsigned int HostVMMinPageSize; unsigned int DCCMetaBufferSizeBytes; bool mrq_present; - enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; + enum dml2_pstate_method *pstate_switch_modes; // Output bool *PTEBufferSizeNotExceeded; @@ -2308,6 +2312,7 @@ struct dml2_core_calcs_mode_support_ex { const struct dml2_display_cfg *in_display_cfg; const struct dml2_mcg_min_clock_table *min_clk_table; int min_clk_index; + enum dml2_project_id project_id; //unsigned int in_state_index; struct dml2_core_internal_mode_support_info *out_evaluation_info; }; @@ -2320,6 +2325,7 @@ struct dml2_core_calcs_mode_programming_ex { const struct dml2_mcg_min_clock_table *min_clk_table; const struct core_display_cfg_support_info *cfg_support_info; int min_clk_index; + enum dml2_project_id project_id; struct dml2_display_cfg_programming *programming; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index d88b3e0082dd..5769c2638f9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -642,6 +642,11 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) int i = 0; struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int base_list_size = 0; + const struct dml2_pmo_pstate_strategy *base_list = NULL; + unsigned int *expanded_list_size = NULL; + struct dml2_pmo_pstate_strategy *expanded_list = NULL; + pmo->soc_bb = in_out->soc_bb; pmo->ip_caps = in_out->ip_caps; pmo->mpc_combine_limit = 2; @@ -656,53 +661,71 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) pmo->options = in_out->options; /* generate permutations of p-state configs from base strategy list */ - for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) { - switch (i) { + for (i = 0; i < PMO_DCN4_MAX_DISPLAYS; i++) { + switch (i+1) { case 1: - DML_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); - - /* populate list */ - pmo_dcn4_fams2_expand_base_pstate_strategies( - base_strategy_list_1_display, - base_strategy_list_1_display_size, - i, - pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display, - &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_1_display; + base_list_size = base_strategy_list_1_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display; + break; case 2: - DML_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); - - /* populate list */ - pmo_dcn4_fams2_expand_base_pstate_strategies( - base_strategy_list_2_display, - base_strategy_list_2_display_size, - i, - pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display, - &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_2_display; + base_list_size = base_strategy_list_2_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display; + break; case 3: - DML_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); - - /* populate list */ - pmo_dcn4_fams2_expand_base_pstate_strategies( - base_strategy_list_3_display, - base_strategy_list_3_display_size, - i, - pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display, - &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_3_display; + base_list_size = base_strategy_list_3_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display; + break; case 4: - DML_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); - - /* populate list */ - pmo_dcn4_fams2_expand_base_pstate_strategies( - base_strategy_list_4_display, - base_strategy_list_4_display_size, - i, - pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display, - &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_4_display; + base_list_size = base_strategy_list_4_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display; + break; } + + DML_ASSERT(base_list_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_list, + base_list_size, + i + 1, + expanded_list, + expanded_list_size); } return true; @@ -1026,13 +1049,13 @@ static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo, return synchronizable; } -static unsigned int calc_svp_microschedule(const struct dml2_fams2_meta *fams2_meta) +static unsigned int calc_svp_microschedule(const struct dml2_pstate_meta *pstate_meta) { - return fams2_meta->contention_delay_otg_vlines + - fams2_meta->method_subvp.programming_delay_otg_vlines + - fams2_meta->method_subvp.phantom_vtotal + - fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + - fams2_meta->dram_clk_change_blackout_otg_vlines; + return pstate_meta->contention_delay_otg_vlines + + pstate_meta->method_subvp.programming_delay_otg_vlines + + pstate_meta->method_subvp.phantom_vtotal + + pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + pstate_meta->blackout_otg_vlines; } static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, @@ -1042,29 +1065,29 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, unsigned int i; for (i = 0; i < DML2_MAX_PLANES; i++) { const struct dml2_stream_parameters *stream_descriptor; - const struct dml2_fams2_meta *stream_fams2_meta; + const struct dml2_pstate_meta *stream_pstate_meta; if (is_bit_set_in_bitfield(mask, i)) { stream_descriptor = &display_config->display_config.stream_descriptors[i]; - stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; if (!stream_descriptor->timing.drr_config.enabled) return false; /* cannot support required vtotal */ - if (stream_fams2_meta->method_drr.stretched_vtotal > stream_fams2_meta->max_vtotal) { + if (stream_pstate_meta->method_drr.stretched_vtotal > stream_pstate_meta->max_vtotal) { return false; } /* check rr is within bounds */ - if (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || - stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { + if (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { return false; } /* check required stretch is allowed */ if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && - stream_fams2_meta->method_drr.stretched_vtotal - stream_fams2_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { + stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { return false; } } @@ -1079,7 +1102,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, { const struct dml2_stream_parameters *stream_descriptor; const struct dml2_plane_parameters *plane_descriptor; - const struct dml2_fams2_meta *stream_fams2_meta; + const struct dml2_pstate_meta *stream_pstate_meta; unsigned int microschedule_vlines; unsigned int i; unsigned int mcaches_per_plane; @@ -1124,13 +1147,13 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, for (i = 0; i < DML2_MAX_PLANES; i++) { if (is_bit_set_in_bitfield(mask, i)) { stream_descriptor = &display_config->display_config.stream_descriptors[i]; - stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; if (stream_descriptor->overrides.disable_subvp) { return false; } - microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_fams2_meta[i]); + microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_pstate_meta[i]); /* block if using an interlaced timing */ if (stream_descriptor->timing.interlaced) { @@ -1141,8 +1164,8 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, * 2) refresh rate must be within the allowed bounds */ if (microschedule_vlines >= stream_descriptor->timing.v_active || - (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || - stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { + (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { return false; } } @@ -1232,43 +1255,43 @@ static bool all_planes_match_method(const struct display_configuation_with_meta } static void build_method_scheduling_params( - struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta, - struct dml2_fams2_meta *stream_fams2_meta) + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta, + struct dml2_pstate_meta *stream_pstate_meta) { - stream_method_fams2_meta->allow_time_us = - (double)((int)stream_method_fams2_meta->allow_end_otg_vline - (int)stream_method_fams2_meta->allow_start_otg_vline) * - stream_fams2_meta->otg_vline_time_us; - if (stream_method_fams2_meta->allow_time_us >= stream_method_fams2_meta->period_us) { + stream_method_pstate_meta->allow_time_us = + (double)((int)stream_method_pstate_meta->allow_end_otg_vline - (int)stream_method_pstate_meta->allow_start_otg_vline) * + stream_pstate_meta->otg_vline_time_us; + if (stream_method_pstate_meta->allow_time_us >= stream_method_pstate_meta->period_us) { /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/ - stream_method_fams2_meta->disallow_time_us = 0.0; + stream_method_pstate_meta->disallow_time_us = 0.0; } else { - stream_method_fams2_meta->disallow_time_us = - stream_method_fams2_meta->period_us - stream_method_fams2_meta->allow_time_us; + stream_method_pstate_meta->disallow_time_us = + stream_method_pstate_meta->period_us - stream_method_pstate_meta->allow_time_us; } } -static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( +static struct dml2_pstate_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, enum dml2_pstate_method stream_pstate_method, int stream_idx) { - struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta = NULL; switch (stream_pstate_method) { case dml2_pstate_method_vactive: case dml2_pstate_method_fw_vactive_drr: - stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vactive.common; break; case dml2_pstate_method_vblank: case dml2_pstate_method_fw_vblank_drr: - stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common; + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vblank.common; break; case dml2_pstate_method_fw_svp: case dml2_pstate_method_fw_svp_drr: - stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common; + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_subvp.common; break; case dml2_pstate_method_fw_drr: - stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common; + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_drr.common; break; case dml2_pstate_method_reserved_hw: case dml2_pstate_method_reserved_fw: @@ -1277,10 +1300,10 @@ static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( case dml2_pstate_method_count: case dml2_pstate_method_na: default: - stream_method_fams2_meta = NULL; + stream_method_pstate_meta = NULL; } - return stream_method_fams2_meta; + return stream_method_pstate_meta; } static bool is_timing_group_schedulable( @@ -1288,10 +1311,10 @@ static bool is_timing_group_schedulable( const struct display_configuation_with_meta *display_cfg, const struct dml2_pmo_pstate_strategy *pstate_strategy, const unsigned int timing_group_idx, - struct dml2_fams2_per_method_common_meta *group_fams2_meta) + struct dml2_pstate_per_method_common_meta *group_pstate_meta) { unsigned int i; - struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta; + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta; unsigned int base_stream_idx = 0; struct dml2_pmo_scratch *s = &pmo->scratch; @@ -1305,31 +1328,31 @@ static bool is_timing_group_schedulable( } /* init allow start and end lines for timing group */ - stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); - if (!stream_method_fams2_meta) + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); + if (!stream_method_pstate_meta) return false; - group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; - group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; - group_fams2_meta->period_us = stream_method_fams2_meta->period_us; + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; + group_pstate_meta->period_us = stream_method_pstate_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); - if (!stream_method_fams2_meta) + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); + if (!stream_method_pstate_meta) continue; - if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { + if (group_pstate_meta->allow_start_otg_vline < stream_method_pstate_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ - group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; } - if (group_fams2_meta->allow_end_otg_vline > stream_method_fams2_meta->allow_end_otg_vline) { + if (group_pstate_meta->allow_end_otg_vline > stream_method_pstate_meta->allow_end_otg_vline) { /* set group allow end to smaller otg vline */ - group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; } /* check waveform still has positive width */ - if (group_fams2_meta->allow_start_otg_vline >= group_fams2_meta->allow_end_otg_vline) { + if (group_pstate_meta->allow_start_otg_vline >= group_pstate_meta->allow_end_otg_vline) { /* timing group is not schedulable */ return false; } @@ -1337,10 +1360,10 @@ static bool is_timing_group_schedulable( } /* calculate the rest of the meta */ - build_method_scheduling_params(group_fams2_meta, &pmo->scratch.pmo_dcn4.stream_fams2_meta[base_stream_idx]); + build_method_scheduling_params(group_pstate_meta, &pmo->scratch.pmo_dcn4.stream_pstate_meta[base_stream_idx]); - return group_fams2_meta->allow_time_us > 0.0 && - group_fams2_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us; + return group_pstate_meta->allow_time_us > 0.0 && + group_pstate_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us; } static bool is_config_schedulable( @@ -1354,7 +1377,7 @@ static bool is_config_schedulable( double max_allow_delay_us = 0.0; - memset(s->pmo_dcn4.group_common_fams2_meta, 0, sizeof(s->pmo_dcn4.group_common_fams2_meta)); + memset(s->pmo_dcn4.group_common_pstate_meta, 0, sizeof(s->pmo_dcn4.group_common_pstate_meta)); memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES); /* search for a general solution to the schedule */ @@ -1369,12 +1392,12 @@ static bool is_config_schedulable( for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_pstate_meta[i])) { /* synchronized timing group was not schedulable */ schedulable = false; break; } - max_allow_delay_us += s->pmo_dcn4.group_common_fams2_meta[i].disallow_time_us; + max_allow_delay_us += s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us; } if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) { @@ -1391,8 +1414,8 @@ static bool is_config_schedulable( bool swapped = false; for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { - double j_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; - double jp1_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; + double j_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; + double jp1_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; if (j_disallow_us < jp1_disallow_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], @@ -1410,19 +1433,19 @@ static bool is_config_schedulable( * other display, or when >2 streams continue to halve the remaining allow time. */ for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { - if (s->pmo_dcn4.group_common_fams2_meta[i].disallow_time_us <= 0.0) { + if (s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us <= 0.0) { /* this timing group always allows */ continue; } - double max_allow_time_us = s->pmo_dcn4.group_common_fams2_meta[i].allow_time_us; + double max_allow_time_us = s->pmo_dcn4.group_common_pstate_meta[i].allow_time_us; for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) { unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; /* stream can't overlap itself */ - if (i != sorted_j && s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us > 0.0) { + if (i != sorted_j && s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us > 0.0) { max_allow_time_us = math_min2( - s->pmo_dcn4.group_common_fams2_meta[sorted_j].allow_time_us, - (max_allow_time_us - s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us) / 2); + s->pmo_dcn4.group_common_pstate_meta[sorted_j].allow_time_us, + (max_allow_time_us - s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us) / 2); if (max_allow_time_us < 0.0) { /* failed exit early */ @@ -1450,8 +1473,8 @@ static bool is_config_schedulable( bool swapped = false; for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { - double j_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; - double jp1_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; + double j_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; + double jp1_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; if (j_period_us < jp1_period_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], @@ -1470,7 +1493,7 @@ static bool is_config_schedulable( unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i]; unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; - if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || + if (s->pmo_dcn4.group_common_pstate_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_pstate_meta[sorted_ip1].period_us || (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { schedulable = false; break; @@ -1492,18 +1515,18 @@ static bool is_config_schedulable( /* default period_0 > period_1 */ unsigned int lrg_idx = 0; unsigned int sml_idx = 1; - if (s->pmo_dcn4.group_common_fams2_meta[0].period_us < s->pmo_dcn4.group_common_fams2_meta[1].period_us) { + if (s->pmo_dcn4.group_common_pstate_meta[0].period_us < s->pmo_dcn4.group_common_pstate_meta[1].period_us) { /* period_0 < period_1 */ lrg_idx = 1; sml_idx = 0; } - period_ratio = s->pmo_dcn4.group_common_fams2_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us; - shift_per_period = s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio)); - max_shift_us = s->pmo_dcn4.group_common_fams2_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us; - max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_fams2_meta[lrg_idx].period_us; + period_ratio = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us; + shift_per_period = s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio)); + max_shift_us = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us; + max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us; if (shift_per_period > 0.0 && - shift_per_period < s->pmo_dcn4.group_common_fams2_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us && + shift_per_period < s->pmo_dcn4.group_common_pstate_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { schedulable = true; } @@ -1661,7 +1684,7 @@ static unsigned int get_vactive_det_fill_latency_delay_us(const struct display_c return max_vactive_fill_us; } -static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, +static void build_pstate_meta_per_stream(struct dml2_pmo_instance *pmo, struct display_configuation_with_meta *display_config, int stream_index) { @@ -1669,7 +1692,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index]; const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index]; const struct dml2_timing_cfg *timing = &stream_descriptor->timing; - struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index]; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; /* worst case all other streams require some programming at the same time, 0 if only 1 stream */ unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us + @@ -1677,142 +1700,142 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, (display_config->display_config.num_streams - 1); /* common */ - stream_fams2_meta->valid = true; - stream_fams2_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; - stream_fams2_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; - stream_fams2_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / - (stream_fams2_meta->nom_vtotal * timing->h_total); - stream_fams2_meta->nom_frame_time_us = - (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; - stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active; + stream_pstate_meta->valid = true; + stream_pstate_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; + stream_pstate_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; + stream_pstate_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->nom_vtotal * timing->h_total); + stream_pstate_meta->nom_frame_time_us = + (double)stream_pstate_meta->nom_vtotal * stream_pstate_meta->otg_vline_time_us; + stream_pstate_meta->vblank_start = timing->v_blank_end + timing->v_active; if (stream_descriptor->timing.drr_config.enabled == true) { if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { - stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9); } else { /* assume min of 48Hz */ - stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / (48000000.0 * stream_descriptor->timing.h_total) * 1e9); } } else { - stream_fams2_meta->max_vtotal = stream_fams2_meta->nom_vtotal; - } - stream_fams2_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / - (stream_fams2_meta->max_vtotal * timing->h_total); - stream_fams2_meta->max_frame_time_us = - (double)stream_fams2_meta->max_vtotal * stream_fams2_meta->otg_vline_time_us; - - stream_fams2_meta->scheduling_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->contention_delay_otg_vlines = - (unsigned int)math_ceil(contention_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_pstate_meta->max_vtotal = stream_pstate_meta->nom_vtotal; + } + stream_pstate_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->max_vtotal * timing->h_total); + stream_pstate_meta->max_frame_time_us = + (double)stream_pstate_meta->max_vtotal * stream_pstate_meta->otg_vline_time_us; + + stream_pstate_meta->scheduling_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->contention_delay_otg_vlines = + (unsigned int)math_ceil(contention_delay_us / stream_pstate_meta->otg_vline_time_us); /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */ - stream_fams2_meta->allow_to_target_delay_otg_vlines = - (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_fams2_meta->otg_vline_time_us)) + 1; - stream_fams2_meta->min_allow_width_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_fams2_meta->otg_vline_time_us); + stream_pstate_meta->allow_to_target_delay_otg_vlines = + (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_pstate_meta->otg_vline_time_us)) + 1; + stream_pstate_meta->min_allow_width_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_pstate_meta->otg_vline_time_us); /* this value should account for urgent latency */ - stream_fams2_meta->dram_clk_change_blackout_otg_vlines = + stream_pstate_meta->blackout_otg_vlines = (unsigned int)math_ceil(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us / - stream_fams2_meta->otg_vline_time_us); + stream_pstate_meta->otg_vline_time_us); /* scheduling params should be built based on the worst case for allow_time:disallow_time */ /* vactive */ if (display_config->display_config.num_streams == 1) { /* for single stream, guarantee at least an instant of allow */ - stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( math_max2(0.0, - timing->v_active - math_max2(1.0, stream_fams2_meta->min_allow_width_otg_vlines) - stream_fams2_meta->dram_clk_change_blackout_otg_vlines)); + timing->v_active - math_max2(1.0, stream_pstate_meta->min_allow_width_otg_vlines) - stream_pstate_meta->blackout_otg_vlines)); } else { /* for multi stream, bound to a max fill time defined by IP caps */ - stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = - (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = + (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_pstate_meta->otg_vline_time_us); } - stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us = stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_fams2_meta->otg_vline_time_us; + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us = stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_pstate_meta->otg_vline_time_us; - if (stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) { - stream_fams2_meta->method_vactive.common.allow_start_otg_vline = - timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; - stream_fams2_meta->method_vactive.common.allow_end_otg_vline = - stream_fams2_meta->vblank_start - - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + if (stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) { + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = + timing->v_blank_end + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; } else { - stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0; - stream_fams2_meta->method_vactive.common.allow_end_otg_vline = 0; + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = 0; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = 0; } - stream_fams2_meta->method_vactive.common.period_us = stream_fams2_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); + stream_pstate_meta->method_vactive.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vactive.common, stream_pstate_meta); /* vblank */ - stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start; - stream_fams2_meta->method_vblank.common.allow_end_otg_vline = - stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; - stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_fams2_meta->method_vblank.common, stream_fams2_meta); + stream_pstate_meta->method_vblank.common.allow_start_otg_vline = stream_pstate_meta->vblank_start; + stream_pstate_meta->method_vblank.common.allow_end_otg_vline = + stream_pstate_meta->method_vblank.common.allow_start_otg_vline + 1; + stream_pstate_meta->method_vblank.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vblank.common, stream_pstate_meta); /* subvp */ - stream_fams2_meta->method_subvp.programming_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->method_subvp.phantom_vactive = - stream_fams2_meta->allow_to_target_delay_otg_vlines + - stream_fams2_meta->min_allow_width_otg_vlines + + stream_pstate_meta->method_subvp.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.phantom_vactive = + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + stream_info->phantom_min_v_active; - stream_fams2_meta->method_subvp.phantom_vfp = - stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines; + stream_pstate_meta->method_subvp.phantom_vfp = + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines; /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/ - stream_fams2_meta->method_subvp.phantom_vtotal = + stream_pstate_meta->method_subvp.phantom_vtotal = stream_info->phantom_v_startup + - stream_fams2_meta->method_subvp.phantom_vfp + + stream_pstate_meta->method_subvp.phantom_vfp + 1 + - stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines + - stream_fams2_meta->method_subvp.phantom_vactive; - stream_fams2_meta->method_subvp.common.allow_start_otg_vline = + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vactive; + stream_pstate_meta->method_subvp.common.allow_start_otg_vline = stream_descriptor->timing.v_blank_end + - stream_fams2_meta->contention_delay_otg_vlines + - stream_fams2_meta->method_subvp.programming_delay_otg_vlines + - stream_fams2_meta->method_subvp.phantom_vtotal + - stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + - stream_fams2_meta->allow_to_target_delay_otg_vlines; - stream_fams2_meta->method_subvp.common.allow_end_otg_vline = - stream_fams2_meta->vblank_start - - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; - stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); + stream_pstate_meta->contention_delay_otg_vlines + + stream_pstate_meta->method_subvp.programming_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vtotal + + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_subvp.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; + stream_pstate_meta->method_subvp.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_subvp.common, stream_pstate_meta); /* drr */ - stream_fams2_meta->method_drr.programming_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); - stream_fams2_meta->method_drr.common.allow_start_otg_vline = - stream_fams2_meta->vblank_start + - stream_fams2_meta->allow_to_target_delay_otg_vlines; - stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; + stream_pstate_meta->method_drr.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_drr.common.allow_start_otg_vline = + stream_pstate_meta->vblank_start + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_drr.common.period_us = stream_pstate_meta->nom_frame_time_us; if (display_config->display_config.num_streams <= 1) { /* only need to stretch vblank for blackout time */ - stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->nom_vtotal + - stream_fams2_meta->allow_to_target_delay_otg_vlines + - stream_fams2_meta->min_allow_width_otg_vlines + - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; } else { /* multi display needs to always be schedulable */ - stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->nom_vtotal * 2 + - stream_fams2_meta->allow_to_target_delay_otg_vlines + - stream_fams2_meta->min_allow_width_otg_vlines + - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; - } - stream_fams2_meta->method_drr.common.allow_end_otg_vline = - stream_fams2_meta->method_drr.stretched_vtotal - - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; - build_method_scheduling_params(&stream_fams2_meta->method_drr.common, stream_fams2_meta); + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal * 2 + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; + } + stream_pstate_meta->method_drr.common.allow_end_otg_vline = + stream_pstate_meta->method_drr.stretched_vtotal - + stream_pstate_meta->blackout_otg_vlines; + build_method_scheduling_params(&stream_pstate_meta->method_drr.common, stream_pstate_meta); } static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, @@ -1820,14 +1843,14 @@ static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, int stream_index) { struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index]; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; stream_svp_meta->valid = true; /* PMO FAMS2 precaulcates these values */ - stream_svp_meta->v_active = stream_fams2_meta->method_subvp.phantom_vactive; - stream_svp_meta->v_front_porch = stream_fams2_meta->method_subvp.phantom_vfp; - stream_svp_meta->v_total = stream_fams2_meta->method_subvp.phantom_vtotal; + stream_svp_meta->v_active = stream_pstate_meta->method_subvp.phantom_vactive; + stream_svp_meta->v_front_porch = stream_pstate_meta->method_subvp.phantom_vfp; + stream_svp_meta->v_total = stream_pstate_meta->method_subvp.phantom_vtotal; } bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) @@ -1879,7 +1902,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); /* FAMS2 meta */ - build_fams2_meta_per_stream(pmo, display_config, stream_index); + build_pstate_meta_per_stream(pmo, display_config, stream_index); /* SVP meta */ build_subvp_meta_per_stream(pmo, display_config, stream_index); @@ -2077,7 +2100,7 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = - (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); } } } @@ -2098,7 +2121,7 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = - (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); } } } @@ -2144,9 +2167,9 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ /* copy FAMS2 meta */ if (success) { display_config->stage3.fams2_required = fams2_required; - memcpy(&display_config->stage3.stream_fams2_meta, - &scratch->pmo_dcn4.stream_fams2_meta, - sizeof(struct dml2_fams2_meta) * DML2_MAX_PLANES); + memcpy(&display_config->stage3.stream_pstate_meta, + &scratch->pmo_dcn4.stream_pstate_meta, + sizeof(struct dml2_pstate_meta) * DML2_MAX_PLANES); } return success; @@ -2188,12 +2211,12 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp return false; for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; + struct dml2_pstate_meta *stream_pstate_meta = &s->pmo_dcn4.stream_pstate_meta[stream_index]; if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || - get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { + get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index d52aa82283b3..9f562f0c4797 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -255,7 +255,7 @@ struct dml2_implicit_svp_meta { unsigned long v_front_porch; }; -struct dml2_fams2_per_method_common_meta { +struct dml2_pstate_per_method_common_meta { /* generic params */ unsigned int allow_start_otg_vline; unsigned int allow_end_otg_vline; @@ -265,7 +265,7 @@ struct dml2_fams2_per_method_common_meta { double period_us; }; -struct dml2_fams2_meta { +struct dml2_pstate_meta { bool valid; double otg_vline_time_us; unsigned int scheduling_delay_otg_vlines; @@ -280,14 +280,14 @@ struct dml2_fams2_meta { unsigned int max_vtotal; double min_refresh_rate_hz; double max_frame_time_us; - unsigned int dram_clk_change_blackout_otg_vlines; + unsigned int blackout_otg_vlines; struct { double max_vactive_det_fill_delay_us; unsigned int max_vactive_det_fill_delay_otg_vlines; - struct dml2_fams2_per_method_common_meta common; + struct dml2_pstate_per_method_common_meta common; } method_vactive; struct { - struct dml2_fams2_per_method_common_meta common; + struct dml2_pstate_per_method_common_meta common; } method_vblank; struct { unsigned int programming_delay_otg_vlines; @@ -296,15 +296,24 @@ struct dml2_fams2_meta { unsigned long phantom_vactive; unsigned long phantom_vfp; unsigned long phantom_vtotal; - struct dml2_fams2_per_method_common_meta common; + struct dml2_pstate_per_method_common_meta common; } method_subvp; struct { unsigned int programming_delay_otg_vlines; unsigned int stretched_vtotal; - struct dml2_fams2_per_method_common_meta common; + struct dml2_pstate_per_method_common_meta common; } method_drr; }; +/* mask of synchronized timings by stream index */ +struct dml2_pmo_synchronized_timing_groups { + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; +}; + struct dml2_optimization_stage3_state { bool performed; bool success; @@ -319,7 +328,7 @@ struct dml2_optimization_stage3_state { // Meta-data for FAMS2 bool fams2_required; - struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES]; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; int min_clk_index_for_latency; }; @@ -472,6 +481,7 @@ struct dml2_core_scratch { }; struct dml2_core_instance { + enum dml2_project_id project_id; struct dml2_mcg_min_clock_table *minimum_clock_table; struct dml2_core_internal_state_inputs inputs; struct dml2_core_internal_state_intermediates intermediates; @@ -619,6 +629,12 @@ struct dml2_pmo_optimize_for_stutter_in_out { #define PMO_DCN4_MAX_NUM_VARIANTS 2 #define PMO_DCN4_MAX_BASE_STRATEGIES 10 +struct dml2_scheduling_check_locals { + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; +}; + struct dml2_pmo_scratch { union { struct { @@ -648,7 +664,7 @@ struct dml2_pmo_scratch { // Stores all the implicit SVP meta information indexed by stream index of the display // configuration under inspection, built at optimization stage init struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; - struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES]; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE]; unsigned int num_stutter_candidates; @@ -663,7 +679,7 @@ struct dml2_pmo_scratch { double group_line_time_us[DML2_MAX_PLANES]; /* scheduling check locals */ - struct dml2_fams2_per_method_common_meta group_common_fams2_meta[DML2_MAX_PLANES]; + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; double group_phase_offset[DML2_MAX_PLANES]; -- cgit v1.2.3 From 81557c96c8a171b5d2500662d4d62f27ab6bad23 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Wed, 17 Sep 2025 12:30:51 -0400 Subject: drm/amd/display: Correct slice width calculation for YCbCr420 [Why] -OVT compliance testing for 5120x2880p300Hz YCbCr420 was failing due to incorrect slice width being calculated [How] -Ensure slice width is divisible by 2 for 420 to comply with spec Reviewed-by: Wenjing Liu Signed-off-by: Relja Vojvodic Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c | 2 +- drivers/gpu/drm/amd/display/dc/dsc/dsc.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 2 ++ drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 1 + 7 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index 89f0d999bf35..ee8c11e085cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -407,7 +407,7 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; // Need to find the ceiling value for the slice width - dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h; + dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dsc_padding + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h; // TODO: in addition to validating slice height (pic height must be divisible by slice height), // see what happens when the same condition doesn't apply for slice_width/pic_width. dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h index b0bd1f9425b5..b433e16842bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h @@ -41,6 +41,7 @@ struct dsc_config { enum dc_color_depth color_depth; /* Bits per component */ bool is_odm; struct dc_dsc_config dc_dsc_cfg; + uint32_t dsc_padding; }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index f925f669f2a4..4ee6ed610de0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -108,6 +108,7 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index f39292952702..bf19ba65d09a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1061,6 +1061,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; if (should_use_dto_dscclk) dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 05011061822c..7ea3fe48b329 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -364,6 +364,7 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 83419e1a9036..dba8ec0988a1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -841,6 +841,7 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; if (should_use_dto_dscclk) dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); @@ -970,6 +971,7 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi dsc_cfg.color_depth = stream->timing.display_color_depth; dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; dsc->funcs->dsc_get_packed_pps(dsc, &dsc_cfg, &dsc_packed_pps[0]); memcpy(&stream->dsc_packed_pps[0], &dsc_packed_pps[0], sizeof(stream->dsc_packed_pps)); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 84b38d2d6967..f4d3ff79717f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1668,6 +1668,7 @@ bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = pipe_ctx->dsc_padding_params.dsc_hactive_padding; if (!pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) return false; -- cgit v1.2.3 From eeab74ee6917d6e9ddd43cc5799b23393f031092 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Wed, 17 Sep 2025 11:21:30 -0400 Subject: drm/amd/display: Consolidate two DML2 FP guards [Why&How] Consolidate two FP guards into one in dml2 since they are separated by one line of code, independent from the guard. Reviewed-by: Dillon Varone Signed-off-by: Ivan Lipski Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 08f7f03b1023..43d333d6608e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -283,9 +283,7 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; DC_FP_START(); dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); - DC_FP_END(); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; - DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); DC_FP_END(); if (!is_supported) -- cgit v1.2.3 From 7f74931c9c6ba900739658a83fce3ae71c78366f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 16 Sep 2025 13:57:17 -0400 Subject: drm/amd/display: Support possibly NULL link for should_use_dmub_lock [Why] It's possible to have a stream enabled without a link or link encoder. There are cases where we'd still like to interlock the driver programming from firmware programming to ensure that we don't put the hardware in an undefined (or error) state if two programming sequences are simultaneously executed on the same hardware blocks. [How] Add an explicit DC parameter to should_use_dmub_lock(). Make pointers to should_use_dmub_lock() const since it's a checker function that shouldn't modify state. Update the callsites to pass in DC explicitly. Check that the link is non-NULL before deferencing and performing link based checks. Reviewed-by: Alvin Lee Signed-off-by: Nicholas Kazlauskas Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +-- .../gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 29 ++++++++++++---------- .../gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h | 2 +- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- .../amd/display/dc/link/accessories/link_dp_cts.c | 4 +-- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5f2d5638c819..3f5ec076da1c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4149,7 +4149,7 @@ static void commit_planes_for_stream(struct dc *dc, if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program && top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(dc, stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -4419,7 +4419,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); - if (should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(dc, stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index d37ecfdde4f1..17c30c5b1679 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -61,27 +61,30 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, dc_dmub_srv_wait_for_inbox0_ack(dmub_srv); } -bool should_use_dmub_lock(struct dc_link *link) +bool should_use_dmub_lock(const struct dc *dc, const struct dc_link *link) { /* ASIC doesn't support DMUB */ - if (!link->ctx->dmub_srv) + if (!dc->ctx->dmub_srv) return false; - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - return true; + if (link) { - if (link->replay_settings.replay_feature_enabled) - return true; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + return true; + + if (link->replay_settings.replay_feature_enabled) + return true; - /* only use HW lock for PSR1 on single eDP */ - if (link->psr_settings.psr_version == DC_PSR_VERSION_1) { - struct dc_link *edp_links[MAX_NUM_EDP]; - int edp_num; + /* only use HW lock for PSR1 on single eDP */ + if (link->psr_settings.psr_version == DC_PSR_VERSION_1) { + struct dc_link *edp_links[MAX_NUM_EDP]; + int edp_num; - dc_get_edp_links(link->dc, edp_links, &edp_num); + dc_get_edp_links(dc, edp_links, &edp_num); - if (edp_num == 1) - return true; + if (edp_num == 1) + return true; + } } return false; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h index 5a72b168fb4a..6e8863877686 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h @@ -37,6 +37,6 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv, void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_cmd_lock_hw hw_lock_cmd); -bool should_use_dmub_lock(struct dc_link *link); +bool should_use_dmub_lock(const struct dc *dc, const struct dc_link *link); #endif /*_DMUB_HW_LOCK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e9fe97f0c4ea..cb915abac15a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2245,7 +2245,7 @@ void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock) if (lock) delay_cursor_until_vupdate(dc, pipe); - if (pipe->stream && should_use_dmub_lock(pipe->stream->link)) { + if (pipe->stream && should_use_dmub_lock(dc, pipe->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 9477c9f9e196..650a1697557b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1449,7 +1449,7 @@ void dcn20_pipe_control_lock( !flip_immediate) dcn20_setup_gsl_group_as_lock(dc, pipe, false); - if (pipe->stream && should_use_dmub_lock(pipe->stream->link)) { + if (pipe->stream && should_use_dmub_lock(dc, pipe->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 9e33bf937a69..3c8eb1510226 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -876,7 +876,7 @@ bool dp_set_test_pattern( return false; if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -924,7 +924,7 @@ bool dp_set_test_pattern( CRTC_STATE_VACTIVE); if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_disable) { - if (should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; -- cgit v1.2.3 From 4d5f626244bd10125876d29dd1146d7538a0dfbe Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 17 Sep 2025 11:46:56 -0400 Subject: drm/amd/display: Rename should_use_dmub_lock to reflect inbox1 usage [Why] Newer DCN use the DMCUB HW lock via inbox0 for performance reasons while older ones will use inbox1. The should_use_dmub_lock() function does not describe whether the lock in general should be used, but whether it should be used via inbox1. [How] Rename the function to should_use_dmub_inbox1_lock() to reflect this. Reviewed-by: Alvin Lee Signed-off-by: Nicholas Kazlauskas Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h | 10 +++++++++- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 4 ++-- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3f5ec076da1c..be885801a9ed 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4149,7 +4149,7 @@ static void commit_planes_for_stream(struct dc *dc, if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program && top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (should_use_dmub_lock(dc, stream->link)) { + if (should_use_dmub_inbox1_lock(dc, stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -4419,7 +4419,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); - if (should_use_dmub_lock(dc, stream->link)) { + if (should_use_dmub_inbox1_lock(dc, stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 17c30c5b1679..39f5fa73c43e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -61,7 +61,7 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, dc_dmub_srv_wait_for_inbox0_ack(dmub_srv); } -bool should_use_dmub_lock(const struct dc *dc, const struct dc_link *link) +bool should_use_dmub_inbox1_lock(const struct dc *dc, const struct dc_link *link) { /* ASIC doesn't support DMUB */ if (!dc->ctx->dmub_srv) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h index 6e8863877686..9f53d2ea5fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h @@ -37,6 +37,14 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv, void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_cmd_lock_hw hw_lock_cmd); -bool should_use_dmub_lock(const struct dc *dc, const struct dc_link *link); +/** + * should_use_dmub_inbox1_lock() - Checks if the DMCUB hardware lock via inbox1 should be used. + * + * @dc: pointer to DC object + * @link: optional pointer to the link object to check for enabled link features + * + * Return: true if the inbox1 lock should be used, false otherwise + */ +bool should_use_dmub_inbox1_lock(const struct dc *dc, const struct dc_link *link); #endif /*_DMUB_HW_LOCK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index cb915abac15a..c88781de6d18 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2245,7 +2245,7 @@ void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock) if (lock) delay_cursor_until_vupdate(dc, pipe); - if (pipe->stream && should_use_dmub_lock(dc, pipe->stream->link)) { + if (pipe->stream && should_use_dmub_inbox1_lock(dc, pipe->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 650a1697557b..cf9e8ce784ce 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1449,7 +1449,7 @@ void dcn20_pipe_control_lock( !flip_immediate) dcn20_setup_gsl_group_as_lock(dc, pipe, false); - if (pipe->stream && should_use_dmub_lock(dc, pipe->stream->link)) { + if (pipe->stream && should_use_dmub_inbox1_lock(dc, pipe->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 3c8eb1510226..1593412354cf 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -876,7 +876,7 @@ bool dp_set_test_pattern( return false; if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (should_use_dmub_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { + if (should_use_dmub_inbox1_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -924,7 +924,7 @@ bool dp_set_test_pattern( CRTC_STATE_VACTIVE); if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_disable) { - if (should_use_dmub_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { + if (should_use_dmub_inbox1_lock(pipe_ctx->stream->link->dc, pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; -- cgit v1.2.3 From 6d92c4d03063480a60bbc3ff61d22197c931f411 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 17 Sep 2025 13:00:58 -0400 Subject: drm/amd/display: Rename FAMS2 global control lock to DMUB HW control lock [Why] FAMS2 dictates whether the inbox0 HW lock is required, but it is not the only feature that may determine this. In order to leverage the faster inbox0 HW lock in place of the inbox1 ringbuffer based control lock it's desirable to utilize the HWSS based locking protocol FAMS2 has already implemented. [How] Rename the FAMS2 global control lock to DMUB HW control lock. This is purely a refactor with no functional change, the logic that will determine which features need to enable this HW lock will be added in a future commit. Reviewed-by: Alvin Lee Signed-off-by: Nicholas Kazlauskas Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 29 +++++++++++----------- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 24 +++++++++--------- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 18 +++++++------- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 4 +-- .../drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 4 +-- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 10 ++++---- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index be885801a9ed..eae2dabd5182 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2158,8 +2158,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c */ if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, true); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, true); if (dc->hwss.update_dsc_pg) dc->hwss.update_dsc_pg(dc, context, false); @@ -2229,8 +2229,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.commit_subvp_config(dc, context); if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, false, true, NULL, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, false); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, false); for (i = 0; i < context->stream_count; i++) { const struct dc_link *link = context->streams[i]->link; @@ -4176,16 +4176,16 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, true); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, true); dc->hwss.interdependent_update_lock(dc, context, true); } else { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, true); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, true); /* Lock the top pipe while updating plane addrs, since freesync requires * plane addr update event triggers to be synchronized. @@ -4228,9 +4228,8 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, false); - + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, false); return; } @@ -4467,13 +4466,13 @@ static void commit_planes_for_stream(struct dc *dc, if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, false); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, false); } else { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); - if (dc->hwss.fams2_global_control_lock) - dc->hwss.fams2_global_control_lock(dc, context, false); + if (dc->hwss.dmub_hw_control_lock) + dc->hwss.dmub_hw_control_lock(dc, context, false); } // Fire manual trigger only when bottom plane is flipped diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index d82b1cb467f4..25a07e5f4ed7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -755,11 +755,11 @@ void hwss_build_fast_sequence(struct dc *dc, block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } - if (dc->hwss.fams2_global_control_lock_fast) { - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.dc = dc; - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.lock = true; - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context); - block_sequence[*num_steps].func = DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST; + if (dc->hwss.dmub_hw_control_lock_fast) { + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.dc = dc; + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.lock = true; + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context); + block_sequence[*num_steps].func = DMUB_HW_CONTROL_LOCK_FAST; (*num_steps)++; } if (dc->hwss.pipe_control_lock) { @@ -894,11 +894,11 @@ void hwss_build_fast_sequence(struct dc *dc, block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } - if (dc->hwss.fams2_global_control_lock_fast) { - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.dc = dc; - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.lock = false; - block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context); - block_sequence[*num_steps].func = DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST; + if (dc->hwss.dmub_hw_control_lock_fast) { + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.dc = dc; + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.lock = false; + block_sequence[*num_steps].params.dmub_hw_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context); + block_sequence[*num_steps].func = DMUB_HW_CONTROL_LOCK_FAST; (*num_steps)++; } @@ -1001,8 +1001,8 @@ void hwss_execute_sequence(struct dc *dc, params->wait_for_dcc_meta_propagation_params.dc, params->wait_for_dcc_meta_propagation_params.top_pipe_to_program); break; - case DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST: - dc->hwss.fams2_global_control_lock_fast(params); + case DMUB_HW_CONTROL_LOCK_FAST: + dc->hwss.dmub_hw_control_lock_fast(params); break; default: ASSERT(false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 7c276c319086..ab21da435a36 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1404,9 +1404,9 @@ void dcn401_prepare_bandwidth(struct dc *dc, } if (dc->debug.fams2_config.bits.enable) { - dcn401_fams2_global_control_lock(dc, context, true); + dcn401_dmub_hw_control_lock(dc, context, true); dcn401_fams2_update_config(dc, context, false); - dcn401_fams2_global_control_lock(dc, context, false); + dcn401_dmub_hw_control_lock(dc, context, false); } if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) { @@ -1425,9 +1425,9 @@ void dcn401_optimize_bandwidth( /* enable fams2 if needed */ if (dc->debug.fams2_config.bits.enable) { - dcn401_fams2_global_control_lock(dc, context, true); + dcn401_dmub_hw_control_lock(dc, context, true); dcn401_fams2_update_config(dc, context, true); - dcn401_fams2_global_control_lock(dc, context, false); + dcn401_dmub_hw_control_lock(dc, context, false); } /* program dchubbub watermarks */ @@ -1466,7 +1466,7 @@ void dcn401_optimize_bandwidth( } } -void dcn401_fams2_global_control_lock(struct dc *dc, +void dcn401_dmub_hw_control_lock(struct dc *dc, struct dc_state *context, bool lock) { @@ -1483,12 +1483,12 @@ void dcn401_fams2_global_control_lock(struct dc *dc, dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd); } -void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params) +void dcn401_dmub_hw_control_lock_fast(union block_sequence_params *params) { - struct dc *dc = params->fams2_global_control_lock_fast_params.dc; - bool lock = params->fams2_global_control_lock_fast_params.lock; + struct dc *dc = params->dmub_hw_control_lock_fast_params.dc; + bool lock = params->dmub_hw_control_lock_fast_params.lock; - if (params->fams2_global_control_lock_fast_params.is_required) { + if (params->dmub_hw_control_lock_fast_params.is_required) { union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 2621b7725267..9591657d8eee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -73,11 +73,11 @@ void dcn401_optimize_bandwidth( struct dc *dc, struct dc_state *context); -void dcn401_fams2_global_control_lock(struct dc *dc, +void dcn401_dmub_hw_control_lock(struct dc *dc, struct dc_state *context, bool lock); void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable); -void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params); +void dcn401_dmub_hw_control_lock_fast(union block_sequence_params *params); void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index d6e11b7e4fce..a221b8cb6d4d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -95,9 +95,9 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, .wait_for_dcc_meta_propagation = dcn401_wait_for_dcc_meta_propagation, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, - .fams2_global_control_lock = dcn401_fams2_global_control_lock, + .dmub_hw_control_lock = dcn401_dmub_hw_control_lock, .fams2_update_config = dcn401_fams2_update_config, - .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, + .dmub_hw_control_lock_fast = dcn401_dmub_hw_control_lock_fast, .program_outstanding_updates = dcn401_program_outstanding_updates, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, .detect_pipe_changes = dcn401_detect_pipe_changes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 1723bbcf2c46..619ac4dfff07 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -148,7 +148,7 @@ struct wait_for_dcc_meta_propagation_params { const struct pipe_ctx *top_pipe_to_program; }; -struct fams2_global_control_lock_fast_params { +struct dmub_hw_control_lock_fast_params { struct dc *dc; bool is_required; bool lock; @@ -173,7 +173,7 @@ union block_sequence_params { struct set_ocsc_default_params set_ocsc_default_params; struct subvp_save_surf_addr subvp_save_surf_addr; struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params; - struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params; + struct dmub_hw_control_lock_fast_params dmub_hw_control_lock_fast_params; }; enum block_sequence_func { @@ -195,7 +195,7 @@ enum block_sequence_func { MPC_SET_OCSC_DEFAULT, DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, - DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, + DMUB_HW_CONTROL_LOCK_FAST, /* This must be the last value in this enum, add new ones above */ HWSS_BLOCK_SEQUENCE_FUNC_COUNT }; @@ -452,13 +452,13 @@ struct hw_sequencer_funcs { const struct dc_state *new_ctx); void (*wait_for_dcc_meta_propagation)(const struct dc *dc, const struct pipe_ctx *top_pipe_to_program); - void (*fams2_global_control_lock)(struct dc *dc, + void (*dmub_hw_control_lock)(struct dc *dc, struct dc_state *context, bool lock); void (*fams2_update_config)(struct dc *dc, struct dc_state *context, bool enable); - void (*fams2_global_control_lock_fast)(union block_sequence_params *params); + void (*dmub_hw_control_lock_fast)(union block_sequence_params *params); void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); void (*program_outstanding_updates)(struct dc *dc, struct dc_state *context); -- cgit v1.2.3 From 13ab3a8f9ae8c300f2c40bc12e399ac700e0f861 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Thu, 11 Sep 2025 13:41:16 +0800 Subject: drm/amd/display: lttpr cap should be nrd cap in bw_alloc mode [WHY] When bw allocation mode enabled, dpia may reports lttpr cap with reduced common cap. It would cause driver not start pre-training with max available bandwidth. [How] When bw allocation mode enabled, use NRD cap as lttpr cap. Reviewed-by: Cruise Hung Reviewed-by: Wenjing Liu Signed-off-by: Peichen Huang Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_capability.c | 38 ++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index b12c11bd6a14..c33a8c17c38a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -424,6 +424,21 @@ static enum dc_link_rate get_link_rate_from_max_link_bw( return link_rate; } +static enum dc_lane_count get_lttpr_max_lane_count(struct dc_link *link) +{ + enum dc_lane_count lttpr_max_lane_count = LANE_COUNT_UNKNOWN; + + if (link->dpcd_caps.lttpr_caps.max_lane_count <= LANE_COUNT_DP_MAX) + lttpr_max_lane_count = link->dpcd_caps.lttpr_caps.max_lane_count; + + /* if bw_allocation is enabled and nrd_max_lane_count is set, use it */ + if (link->dpia_bw_alloc_config.bw_alloc_enabled && + link->dpia_bw_alloc_config.nrd_max_lane_count > 0) + lttpr_max_lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; + + return lttpr_max_lane_count; +} + static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link) { @@ -438,6 +453,11 @@ static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link) break; } + /* if bw_allocation is enabled and nrd_max_link_rate is set, use it */ + if (link->dpia_bw_alloc_config.bw_alloc_enabled && + link->dpia_bw_alloc_config.nrd_max_link_rate > 0) + lttpr_max_link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; + if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR20) lttpr_max_link_rate = LINK_RATE_UHBR20; else if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR13_5) @@ -2241,6 +2261,7 @@ const struct dc_link_settings *dp_get_verified_link_cap( struct dc_link_settings dp_get_max_link_cap(struct dc_link *link) { struct dc_link_settings max_link_cap = {0}; + enum dc_lane_count lttpr_max_lane_count; enum dc_link_rate lttpr_max_link_rate; enum dc_link_rate cable_max_link_rate; struct resource_context *res_ctx = &link->dc->current_state->res_ctx; @@ -2305,8 +2326,11 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link) /* Some LTTPR devices do not report valid DPCD revisions, if so, do not take it's link cap into consideration. */ if (link->dpcd_caps.lttpr_caps.revision.raw >= DPCD_REV_14) { - if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count) - max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count; + lttpr_max_lane_count = get_lttpr_max_lane_count(link); + + if (lttpr_max_lane_count < max_link_cap.lane_count) + max_link_cap.lane_count = lttpr_max_lane_count; + lttpr_max_link_rate = get_lttpr_max_link_rate(link); if (lttpr_max_link_rate < max_link_cap.link_rate) @@ -2412,6 +2436,11 @@ bool dp_verify_link_cap_with_retries( dp_trace_detect_lt_init(link); + DC_LOG_HW_LINK_TRAINING("%s: Link[%d] LinkRate=0x%x LaneCount=%d", + __func__, link->link_index, + known_limit_link_setting->link_rate, + known_limit_link_setting->lane_count); + if (link->link_enc && link->link_enc->features.flags.bits.DP_IS_USB_C && link->dc->debug.usbc_combo_phy_reset_wa) apply_usbc_combo_phy_reset_wa(link, known_limit_link_setting); @@ -2448,6 +2477,11 @@ bool dp_verify_link_cap_with_retries( dp_trace_lt_fail_count_update(link, fail_count, true); dp_trace_set_lt_end_timestamp(link, true); + DC_LOG_HW_LINK_TRAINING("%s: Link[%d] Exit. is_success=%d fail_count=%d", + __func__, link->link_index, + success, + fail_count); + return success; } -- cgit v1.2.3 From d04eb0c402780ca037b62a6aecf23b863545ebca Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 18 Sep 2025 16:25:45 -0400 Subject: drm/amd/display: Prevent Gating DTBCLK before It Is Properly Latched [why] 1. With allow_0_dtb_clk enabled, the time required to latch DTBCLK to 600 MHz depends on the SMU. If DTBCLK is not latched to 600 MHz before set_mode completes, gating DTBCLK causes the DP2 sink to lose its clock source. 2. The existing DTBCLK gating sequence ungates DTBCLK based on both pix_clk and ref_dtbclk, but gates DTBCLK when either pix_clk or ref_dtbclk is zero. pix_clk can be zero outside the set_mode sequence before DTBCLK is properly latched, which can lead to DTBCLK being gated by mistake. [how] Consider both pixel_clk and ref_dtbclk when determining when it is safe to gate DTBCLK; this is more accurate. Reviewed-by: Charlene Liu Reviewed-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 4 +++- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index b11383fba35f..1eb04772f5da 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -394,6 +394,8 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps); if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; + else if (!new_clocks->dtbclk_en && new_clocks->ref_dtbclk_khz > 590000) + new_clocks->ref_dtbclk_khz = 0; /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything @@ -435,7 +437,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT); - if (actual_dtbclk) { + if (actual_dtbclk > 590000) { clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; } diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index de6d62401362..c899c09ea31b 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1411,7 +1411,7 @@ static void dccg35_set_dtbclk_dto( __func__, params->otg_inst, params->pixclk_khz, params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo); - } else { + } else if (!params->ref_dtbclk_khz && !req_dtbclk_khz) { switch (params->otg_inst) { case 0: REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0); -- cgit v1.2.3 From a93b77655be56e4ef3383942b45ae6eb05f27ac7 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 19 Sep 2025 16:22:48 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.1.29.0 Add new interface for offloading cursor programming to DMUB. Reviewed-by: Sun peng (Leo) Li Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 204 +++++++++++++++++++++++- 1 file changed, 203 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 92248224b713..0a250699999c 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -629,6 +629,112 @@ struct dmub_visual_confirm_color { uint16_t panel_inst; }; +/** + * struct dmub_cursor_offload_pipe_data_dcn30_v1 - DCN30+ per pipe data. + */ +struct dmub_cursor_offload_pipe_data_dcn30_v1 { + uint32_t CURSOR0_0_CURSOR_SURFACE_ADDRESS; + uint32_t CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH; + uint32_t CURSOR0_0_CURSOR_SIZE__CURSOR_WIDTH : 16; + uint32_t CURSOR0_0_CURSOR_SIZE__CURSOR_HEIGHT : 16; + uint32_t CURSOR0_0_CURSOR_POSITION__CURSOR_X_POSITION : 16; + uint32_t CURSOR0_0_CURSOR_POSITION__CURSOR_Y_POSITION : 16; + uint32_t CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_X : 16; + uint32_t CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_Y : 16; + uint32_t CURSOR0_0_CURSOR_DST_OFFSET__CURSOR_DST_X_OFFSET : 13; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_ENABLE : 1; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE : 3; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY : 1; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH : 2; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK : 5; + uint32_t reserved0[4]; + uint32_t CNVC_CUR0_CURSOR0_CONTROL__CUR0_ENABLE : 1; + uint32_t CNVC_CUR0_CURSOR0_CONTROL__CUR0_MODE : 3; + uint32_t CNVC_CUR0_CURSOR0_CONTROL__CUR0_EXPANSION_MODE : 1; + uint32_t CNVC_CUR0_CURSOR0_CONTROL__CUR0_ROM_EN : 1; + uint32_t CNVC_CUR0_CURSOR0_COLOR0__CUR0_COLOR0 : 24; + uint32_t CNVC_CUR0_CURSOR0_COLOR1__CUR0_COLOR1 : 24; + uint32_t CNVC_CUR0_CURSOR0_FP_SCALE_BIAS__CUR0_FP_BIAS : 16; + uint32_t CNVC_CUR0_CURSOR0_FP_SCALE_BIAS__CUR0_FP_SCALE, : 16; + uint32_t reserved1[5]; + uint32_t HUBPREQ0_CURSOR_SETTINGS__CURSOR0_DST_Y_OFFSET : 8; + uint32_t HUBPREQ0_CURSOR_SETTINGS__CURSOR0_CHUNK_HDL_ADJUST : 8; + uint32_t reserved2[3]; +}; + +/** + * struct dmub_cursor_offload_pipe_data_dcn401_v1 - DCN401 per pipe data. + */ +struct dmub_cursor_offload_pipe_data_dcn401_v1 { + uint32_t CURSOR0_0_CURSOR_SURFACE_ADDRESS; + uint32_t CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH; + uint32_t CURSOR0_0_CURSOR_SIZE__CURSOR_WIDTH : 16; + uint32_t CURSOR0_0_CURSOR_SIZE__CURSOR_HEIGHT : 16; + uint32_t CURSOR0_0_CURSOR_POSITION__CURSOR_X_POSITION : 16; + uint32_t CURSOR0_0_CURSOR_POSITION__CURSOR_Y_POSITION : 16; + uint32_t CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_X : 16; + uint32_t CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_Y : 16; + uint32_t CURSOR0_0_CURSOR_DST_OFFSET__CURSOR_DST_X_OFFSET : 13; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_ENABLE : 1; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE : 3; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY : 1; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH : 2; + uint32_t CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK : 5; + uint32_t reserved0[4]; + uint32_t CM_CUR0_CURSOR0_CONTROL__CUR0_ENABLE : 1; + uint32_t CM_CUR0_CURSOR0_CONTROL__CUR0_MODE : 3; + uint32_t CM_CUR0_CURSOR0_CONTROL__CUR0_EXPANSION_MODE : 1; + uint32_t CM_CUR0_CURSOR0_CONTROL__CUR0_ROM_EN : 1; + uint32_t CM_CUR0_CURSOR0_COLOR0__CUR0_COLOR0 : 24; + uint32_t CM_CUR0_CURSOR0_COLOR1__CUR0_COLOR1 : 24; + uint32_t CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y__CUR0_FP_BIAS_G_Y : 16; + uint32_t CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y__CUR0_FP_SCALE_G_Y, : 16; + uint32_t CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB__CUR0_FP_BIAS_RB_CRCB : 16; + uint32_t CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB__CUR0_FP_SCALE_RB_CRCB : 16; + uint32_t reserved1[4]; + uint32_t HUBPREQ0_CURSOR_SETTINGS__CURSOR0_DST_Y_OFFSET : 8; + uint32_t HUBPREQ0_CURSOR_SETTINGS__CURSOR0_CHUNK_HDL_ADJUST : 8; + uint32_t HUBP0_DCHUBP_MALL_CONFIG__USE_MALL_FOR_CURSOR : 1; + uint32_t reserved2[3]; +}; + +/** + * struct dmub_cursor_offload_pipe_data_v1 - Per pipe data for cursor offload. + */ +struct dmub_cursor_offload_pipe_data_v1 { + union { + struct dmub_cursor_offload_pipe_data_dcn30_v1 dcn30; /**< DCN30 cursor data. */ + struct dmub_cursor_offload_pipe_data_dcn401_v1 dcn401; /**< DCN401 cursor data. */ + uint8_t payload[96]; /**< Guarantees the cursor pipe data size per-pipe. */ + }; +}; + +/** + * struct dmub_cursor_offload_payload_data_v1 - A payload of stream data. + */ +struct dmub_cursor_offload_payload_data_v1 { + uint32_t write_idx_start; /**< Write index, updated before pipe_data is written. */ + uint32_t write_idx_finish; /**< Write index, updated after pipe_data is written. */ + uint32_t pipe_mask; /**< Mask of pipes to update. */ + uint32_t reserved; /**< Reserved for future use. */ + struct dmub_cursor_offload_pipe_data_v1 pipe_data[6]; /**< Per-pipe cursor data. */ +}; + +/** + * struct dmub_cursor_offload_stream_v1 - Per-stream data for cursor offload. + */ +struct dmub_cursor_offload_stream_v1 { + struct dmub_cursor_offload_payload_data_v1 payloads[4]; /**< A small buffer of cursor payloads. */ + uint32_t write_idx; /**< The index of the last written payload. */ +}; + +/** + * struct dmub_cursor_offload_v1 - Cursor offload feature state. + */ +struct dmub_cursor_offload_v1 { + struct dmub_cursor_offload_stream_v1 offload_streams[6]; /**< Per-stream cursor offload data */ +}; + //============================================================================== //================================================================= //============================================================================== @@ -648,7 +754,8 @@ struct dmub_visual_confirm_color { union dmub_fw_meta_feature_bits { struct { uint32_t shared_state_link_detection : 1; /**< 1 supports link detection via shared state */ - uint32_t reserved : 31; + uint32_t cursor_offload_v1_support: 1; /**< 1 supports cursor offload */ + uint32_t reserved : 30; } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -813,6 +920,28 @@ enum dmub_ips_comand_type { DMUB_CMD__IPS_QUERY_RESIDENCY_INFO = 1, }; +/** + * enum dmub_cursor_offload_comand_type - Cursor offload subcommands. + */ +enum dmub_cursor_offload_comand_type { + /** + * Initializes the cursor offload feature. + */ + DMUB_CMD__CURSOR_OFFLOAD_INIT = 0, + /** + * Enables cursor offloading for a stream and updates the timing parameters. + */ + DMUB_CMD__CURSOR_OFFLOAD_STREAM_ENABLE = 1, + /** + * Disables cursor offloading for a given stream. + */ + DMUB_CMD__CURSOR_OFFLOAD_STREAM_DISABLE = 2, + /** + * Programs the latest data for a given stream. + */ + DMUB_CMD__CURSOR_OFFLOAD_STREAM_PROGRAM = 3, +}; + /** * union dmub_fw_boot_options - Boot option definitions for SCRATCH14 */ @@ -877,6 +1006,7 @@ enum dmub_shared_state_feature_id { DMUB_SHARED_SHARE_FEATURE__IPS_FW = 1, DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER = 2, DMUB_SHARED_SHARE_FEATURE__DEBUG_SETUP = 3, + DMUB_SHARED_STATE_FEATURE__CURSOR_OFFLOAD_V1 = 4, DMUB_SHARED_STATE_FEATURE__LAST, /* Total number of features. */ }; @@ -957,6 +1087,22 @@ struct dmub_shared_state_ips_driver { uint32_t reserved[61]; /**< Reversed, to be updated when adding new fields. */ }; /* 248-bytes, fixed */ +/** + * struct dmub_shared_state_cursor_offload_v1 - Header metadata for cursor offload. + */ +struct dmub_shared_state_cursor_offload_stream_v1 { + uint32_t last_write_idx; /**< Last write index */ + uint8_t reserved[28]; /**< Reserved bytes. */ +}; /* 32-bytes, fixed */ + +/** + * struct dmub_shared_state_cursor_offload_v1 - Header metadata for cursor offload. + */ +struct dmub_shared_state_cursor_offload_v1 { + struct dmub_shared_state_cursor_offload_stream_v1 offload_streams[6]; /**< stream state, 32-bytes each */ + uint8_t reserved[56]; /**< reserved for future use */ +}; /* 248-bytes, fixed */ + /** * enum dmub_shared_state_feature_common - Generic payload. */ @@ -983,6 +1129,7 @@ struct dmub_shared_state_feature_block { struct dmub_shared_state_ips_fw ips_fw; /**< IPS firmware state */ struct dmub_shared_state_ips_driver ips_driver; /**< IPS driver state */ struct dmub_shared_state_debug_setup debug_setup; /**< Debug setup */ + struct dmub_shared_state_cursor_offload_v1 cursor_offload_v1; /**< Cursor offload */ } data; /**< Shared state data. */ }; /* 256-bytes, fixed */ @@ -1572,6 +1719,14 @@ enum dmub_cmd_type { */ DMUB_CMD__IPS = 91, + /** + * Command type use for Cursor offload. + */ + DMUB_CMD__CURSOR_OFFLOAD = 92, + + /** + * Command type use for VBIOS shared commands. + */ DMUB_CMD__VBIOS = 128, }; @@ -4652,6 +4807,7 @@ enum hw_lock_client { */ HW_LOCK_CLIENT_REPLAY = 4, HW_LOCK_CLIENT_FAMS2 = 5, + HW_LOCK_CLIENT_CURSOR_OFFLOAD = 6, /** * Invalid client. */ @@ -6063,6 +6219,40 @@ struct dmub_rb_cmd_ips_query_residency_info { struct dmub_cmd_ips_query_residency_info_data info_data; }; +/** + * struct dmub_cmd_cursor_offload_init_data - Payload for cursor offload init command. + */ +struct dmub_cmd_cursor_offload_init_data { + union dmub_addr state_addr; /**< State address for dmub_cursor_offload */ + uint32_t state_size; /**< State size for dmub_cursor_offload */ +}; + +/** + * struct dmub_rb_cmd_cursor_offload_init - Data for initializing cursor offload. + */ +struct dmub_rb_cmd_cursor_offload_init { + struct dmub_cmd_header header; + struct dmub_cmd_cursor_offload_init_data init_data; +}; + +/** + * struct dmub_cmd_cursor_offload_stream_data - Payload for cursor offload stream command. + */ +struct dmub_cmd_cursor_offload_stream_data { + uint32_t otg_inst: 4; /**< OTG instance to control */ + uint32_t reserved: 28; /**< Reserved for future use */ + uint32_t line_time_in_ns; /**< Line time in ns for the OTG */ + uint32_t v_total_max; /**< OTG v_total_max */ +}; + +/** + * struct dmub_rb_cmd_cursor_offload_stream_cntl - Controls a stream for cursor offload. + */ +struct dmub_rb_cmd_cursor_offload_stream_cntl { + struct dmub_cmd_header header; + struct dmub_cmd_cursor_offload_stream_data data; +}; + /** * union dmub_rb_cmd - DMUB inbox command. */ @@ -6392,6 +6582,18 @@ union dmub_rb_cmd { struct dmub_rb_cmd_ips_residency_cntl ips_residency_cntl; struct dmub_rb_cmd_ips_query_residency_info ips_query_residency_info; + /** + * Definition of a DMUB_CMD__CURSOR_OFFLOAD_INIT command. + */ + struct dmub_rb_cmd_cursor_offload_init cursor_offload_init; + /** + * Definition of a DMUB_CMD__CURSOR_OFFLOAD control commands. + * - DMUB_CMD__CURSOR_OFFLOAD_STREAM_ENABLE + * - DMUB_CMD__CURSOR_OFFLOAD_STREAM_DISABLE + * - DMUB_CMD__CURSOR_OFFLOAD_STREAM_PROGRAM + * - DMUB_CMD__CURSOR_OFFLOAD_STREAM_UPDATE_DRR + */ + struct dmub_rb_cmd_cursor_offload_stream_cntl cursor_offload_stream_ctnl; }; /** -- cgit v1.2.3 From 565c41e64b67d34ddf5288bdda7e14bc61d8f0d1 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 19 Sep 2025 17:20:17 -0500 Subject: drm/amd/display: Promote DC to 3.2.352 - Fix slice width calculation for YCbCr420 - Fix DTBCLK gating - Use NRD cap as lttpr cap - Consolidate DML2 FP guards - DML2.1 Update - Firmware Release 0.1.29.0 changes Reviewed-by: Sun peng (Leo) Li Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 98f0b6b3c213..35140b3e1332 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.351" +#define DC_VER "3.2.352" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From 7b747d38124dfeeb2da90a911364019fe2acaae7 Mon Sep 17 00:00:00 2001 From: Clay King Date: Mon, 22 Sep 2025 10:33:21 -0400 Subject: drm/amd/display: Remove inaccessible URL [WHAT] Remove inaccessible link. Reviewed-by: Joshua Aberback Signed-off-by: Clay King Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c index e2740482e1cf..08ea0a1b9e7f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c @@ -73,8 +73,6 @@ * On any mode switch, if the new reg values are smaller than the current values, * then update the regs with the new values. * - * Link to the ticket: http://ontrack-internal.amd.com/browse/DEDCN21-142 - * */ void apply_DEDCN21_142_wa_for_hostvm_deadline( struct hubp *hubp, -- cgit v1.2.3 From c58d6b1d98db30a523b3b4d052ef764e503a4c34 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 24 Sep 2025 09:27:35 -0600 Subject: drm/amd/display: Remove comparing uint32_t to zero [WHAT] These *bypass are uint32_t and they will never be less than zero. Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 9e63fa72101c..db687a13174d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -509,16 +509,16 @@ void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_b regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + if (regs_and_bypass->dppclk_bypass > 4) regs_and_bypass->dppclk_bypass = 0; regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + if (regs_and_bypass->dcfclk_bypass > 4) regs_and_bypass->dcfclk_bypass = 0; regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + if (regs_and_bypass->dispclk_bypass > 4) regs_and_bypass->dispclk_bypass = 0; regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + if (regs_and_bypass->dprefclk_bypass > 4) regs_and_bypass->dprefclk_bypass = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1eb04772f5da..35d20a663d67 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -635,16 +635,16 @@ static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + if (regs_and_bypass->dppclk_bypass > 4) regs_and_bypass->dppclk_bypass = 0; regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + if (regs_and_bypass->dcfclk_bypass > 4) regs_and_bypass->dcfclk_bypass = 0; regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + if (regs_and_bypass->dispclk_bypass > 4) regs_and_bypass->dispclk_bypass = 0; regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; - if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + if (regs_and_bypass->dprefclk_bypass > 4) regs_and_bypass->dprefclk_bypass = 0; if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { -- cgit v1.2.3 From 8e8691ecee8239634dd9a5f87655fba9bb1ee874 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 26 Aug 2025 17:12:44 -0400 Subject: drm/amd/display: Driver implementation for cursor offloading to DMU [Why] We require an interlock between driver and firmware for upcoming features and given that this could possibly happen on any single cursor programming call (and that we can't asynchronously wait for firmware to respond because of it) we'd be regressing cursor performance by at least an extra 40us per call. When we could possibly have cursor update every 20us - 100s from high frequency gaming mice this means that we'd be stuttering or dropping updates and impacting overall cursor performance. We want a solution that can: 1. Interlock between other firmware features 2. Not stall out or require the DMCUB lock for every single update [How] When cursor offloading is enabled and supported by an ASIC driver will route the cursor programming through to DMU as part of the regular DC stream cursor programming interfaces for attributes and position. The atomic pipe programming version will not be updated: this will still follow the existing programming path by keeping track of a field that specifies when the register writes should be deferred to DMU. Cursor locking is not required when cursor offload is in progress since the updates are consolidated and processed by DMU once at the end of the frame in a periodic manner. The shared buffer the firmware queries from is allocated along with the rest of the scratch state region in an area that's accessible by both firmware and driver. The size of the cursor offload (v1) state will not change, but it does have a unique union per ASIC version with room for expansion if needed. When firmware features notifying DMU of DRR updates are not enabled we now send an explicit vtotal min/max update via driver to DMU firmware whenever the vtotal max changes. This is to allow the cursor programming to determine the appropriate latch update point offset from vupdate. Reviewed-by: Dillon Varone Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++ .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 12 ++ drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 42 +++++-- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 99 +++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 36 ++++++ .../gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c | 19 ++- .../gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 20 +-- .../drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 42 ++++--- .../gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c | 1 + .../gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c | 69 ++++++----- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 3 + .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 + .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 138 +++++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h | 8 ++ .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 6 + .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 60 ++++++++- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 2 + .../drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 7 ++ drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 14 +++ .../drm/amd/display/dc/inc/hw/cursor_reg_cache.h | 22 ++++ drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 1 + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 6 +- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 22 ++-- 25 files changed, 567 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index eae2dabd5182..2c2635b6cb0f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -496,6 +496,10 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, return true; } } + + if (dc->hwss.notify_cursor_offload_drr_update) + dc->hwss.notify_cursor_offload_drr_update(dc, dc->current_state, stream); + return false; } @@ -2188,8 +2192,14 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe); } + for (i = 0; i < dc->current_state->stream_count; i++) + dc_dmub_srv_control_cursor_offload(dc, dc->current_state, dc->current_state->streams[i], false); + result = dc->hwss.apply_ctx_to_hw(dc, context); + for (i = 0; i < context->stream_count; i++) + dc_dmub_srv_control_cursor_offload(dc, context, context->streams[i], true); + if (result != DC_OK) { /* Application of dc_state to hardware stopped. */ dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY; @@ -4488,6 +4498,8 @@ static void commit_planes_for_stream(struct dc *dc, pipe_ctx->plane_state->skip_manual_trigger) continue; + if (dc->hwss.program_cursor_offload_now) + dc->hwss.program_cursor_offload_now(dc, pipe_ctx); if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger) pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 25a07e5f4ed7..1bed3b14a287 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -911,6 +911,13 @@ void hwss_build_fast_sequence(struct dc *dc, current_mpc_pipe->stream && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.bits.addr_update && !current_mpc_pipe->plane_state->skip_manual_trigger) { + if (dc->hwss.program_cursor_offload_now) { + block_sequence[*num_steps].params.program_cursor_update_now_params.dc = dc; + block_sequence[*num_steps].params.program_cursor_update_now_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = PROGRAM_CURSOR_UPDATE_NOW; + (*num_steps)++; + } + block_sequence[*num_steps].params.program_manual_trigger_params.pipe_ctx = current_mpc_pipe; block_sequence[*num_steps].func = OPTC_PROGRAM_MANUAL_TRIGGER; (*num_steps)++; @@ -1004,6 +1011,11 @@ void hwss_execute_sequence(struct dc *dc, case DMUB_HW_CONTROL_LOCK_FAST: dc->hwss.dmub_hw_control_lock_fast(params); break; + case PROGRAM_CURSOR_UPDATE_NOW: + dc->hwss.program_cursor_offload_now( + params->program_cursor_update_now_params.dc, + params->program_cursor_update_now_params.pipe_ctx); + break; default: ASSERT(false); break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 9ac2d41f8fca..ccaf37d3e7e4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -231,6 +231,7 @@ void program_cursor_attributes( int i; struct resource_context *res_ctx; struct pipe_ctx *pipe_to_program = NULL; + bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc); if (!stream) return; @@ -245,9 +246,14 @@ void program_cursor_attributes( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - dc->hwss.cursor_lock(dc, pipe_to_program, true); - if (pipe_to_program->next_odm_pipe) - dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, true); + + if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update) { + dc->hwss.begin_cursor_offload_update(dc, pipe_ctx); + } else { + dc->hwss.cursor_lock(dc, pipe_to_program, true); + if (pipe_to_program->next_odm_pipe) + dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, true); + } } dc->hwss.set_cursor_attribute(pipe_ctx); @@ -255,12 +261,18 @@ void program_cursor_attributes( dc_send_update_cursor_info_to_dmu(pipe_ctx, i); if (dc->hwss.set_cursor_sdr_white_level) dc->hwss.set_cursor_sdr_white_level(pipe_ctx); + if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe) + dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx); } if (pipe_to_program) { - dc->hwss.cursor_lock(dc, pipe_to_program, false); - if (pipe_to_program->next_odm_pipe) - dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, false); + if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update) { + dc->hwss.commit_cursor_offload_update(dc, pipe_to_program); + } else { + dc->hwss.cursor_lock(dc, pipe_to_program, false); + if (pipe_to_program->next_odm_pipe) + dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, false); + } } } @@ -366,6 +378,7 @@ void program_cursor_position( int i; struct resource_context *res_ctx; struct pipe_ctx *pipe_to_program = NULL; + bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc); if (!stream) return; @@ -384,16 +397,27 @@ void program_cursor_position( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - dc->hwss.cursor_lock(dc, pipe_to_program, true); + + if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update) + dc->hwss.begin_cursor_offload_update(dc, pipe_ctx); + else + dc->hwss.cursor_lock(dc, pipe_to_program, true); } dc->hwss.set_cursor_position(pipe_ctx); + if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe) + dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx); + if (dc->ctx->dmub_srv) dc_send_update_cursor_info_to_dmu(pipe_ctx, i); } - if (pipe_to_program) - dc->hwss.cursor_lock(dc, pipe_to_program, false); + if (pipe_to_program) { + if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update) + dc->hwss.commit_cursor_offload_update(dc, pipe_to_program); + else + dc->hwss.cursor_lock(dc, pipe_to_program, false); + } } bool dc_stream_set_cursor_position( diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 35140b3e1332..0494e6cd3b33 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -530,6 +530,7 @@ struct dc_config { bool set_pipe_unlock_order; bool enable_dpia_pre_training; bool unify_link_enc_assignment; + bool enable_cursor_offload; struct spl_sharpness_range dcn_sharpness_range; struct spl_sharpness_range dcn_override_sharpness_range; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 53a088ebddef..c75663aefcf3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1174,6 +1174,100 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con dmub_srv_subvp_save_surf_addr(dc_dmub_srv->dmub, addr, subvp_index); } +void dc_dmub_srv_cursor_offload_init(struct dc *dc) +{ + struct dmub_rb_cmd_cursor_offload_init *init; + struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; + union dmub_rb_cmd cmd; + + if (!dc->config.enable_cursor_offload) + return; + + if (!dc_dmub_srv->dmub->meta_info.feature_bits.bits.cursor_offload_v1_support) + return; + + if (!dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr || !dc_dmub_srv->dmub->cursor_offload_fb.cpu_addr) + return; + + if (!dc_dmub_srv->dmub->cursor_offload_v1) + return; + + if (!dc_dmub_srv->dmub->shared_state) + return; + + memset(&cmd, 0, sizeof(cmd)); + + init = &cmd.cursor_offload_init; + init->header.type = DMUB_CMD__CURSOR_OFFLOAD; + init->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_INIT; + init->header.payload_bytes = sizeof(init->init_data); + init->init_data.state_addr.quad_part = dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr; + init->init_data.state_size = dc_dmub_srv->dmub->cursor_offload_fb.size; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + + dc_dmub_srv->cursor_offload_enabled = true; +} + +void dc_dmub_srv_control_cursor_offload(struct dc *dc, struct dc_state *context, + const struct dc_stream_state *stream, bool enable) +{ + struct pipe_ctx const *pipe_ctx; + struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl; + union dmub_rb_cmd cmd; + + if (!dc_dmub_srv_is_cursor_offload_enabled(dc)) + return; + + if (!stream) + return; + + pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + if (!pipe_ctx || !pipe_ctx->stream_res.tg || pipe_ctx->stream != stream) + return; + + memset(&cmd, 0, sizeof(cmd)); + + cntl = &cmd.cursor_offload_stream_ctnl; + cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD; + cntl->header.sub_type = + enable ? DMUB_CMD__CURSOR_OFFLOAD_STREAM_ENABLE : DMUB_CMD__CURSOR_OFFLOAD_STREAM_DISABLE; + cntl->header.payload_bytes = sizeof(cntl->data); + + cntl->data.otg_inst = pipe_ctx->stream_res.tg->inst; + cntl->data.line_time_in_ns = 1u + (uint32_t)(div64_u64(stream->timing.h_total * 1000000ull, + stream->timing.pix_clk_100hz / 10)); + + cntl->data.v_total_max = stream->adjust.v_total_max > stream->timing.v_total ? + stream->adjust.v_total_max : + stream->timing.v_total; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, + enable ? DM_DMUB_WAIT_TYPE_NO_WAIT : DM_DMUB_WAIT_TYPE_WAIT); +} + +void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe) +{ + struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl; + union dmub_rb_cmd cmd; + + if (!dc_dmub_srv_is_cursor_offload_enabled(dc)) + return; + + if (!pipe || !pipe->stream || !pipe->stream_res.tg) + return; + + memset(&cmd, 0, sizeof(cmd)); + + cntl = &cmd.cursor_offload_stream_ctnl; + cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD; + cntl->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_STREAM_PROGRAM; + cntl->header.payload_bytes = sizeof(cntl->data); + cntl->data.otg_inst = pipe->stream_res.tg->inst; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); +} + bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) { struct dc_context *dc_ctx; @@ -2231,6 +2325,11 @@ bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uin return result; } +bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc) +{ + return dc->ctx->dmub_srv && dc->ctx->dmub_srv->cursor_offload_enabled; +} + void dc_dmub_srv_release_hw(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 7ef93444ef3c..9bb00d48fd5e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -56,6 +56,7 @@ struct dc_dmub_srv { union dmub_shared_state_ips_driver_signals driver_signals; bool idle_allowed; bool needs_idle_wake; + bool cursor_offload_enabled; }; bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); @@ -325,6 +326,41 @@ bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t struct dmub_ips_residency_info *driver_info, enum ips_residency_mode ips_mode); +/** + * dc_dmub_srv_cursor_offload_init() - Enables or disables cursor offloading for a stream. + * + * @dc: pointer to DC object + */ +void dc_dmub_srv_cursor_offload_init(struct dc *dc); + +/** + * dc_dmub_srv_control_cursor_offload() - Enables or disables cursor offloading for a stream. + * + * @dc: pointer to DC object + * @context: the DC context to reference for pipe allocations + * @stream: the stream to control + * @enable: true to enable cursor offload, false to disable + */ +void dc_dmub_srv_control_cursor_offload(struct dc *dc, struct dc_state *context, + const struct dc_stream_state *stream, bool enable); + +/** + * dc_dmub_srv_program_cursor_now() - Requests immediate cursor programming for a given pipe. + * + * @dc: pointer to DC object + * @pipe: top-most pipe for a stream. + */ +void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe); + +/** + * dc_dmub_srv_is_cursor_offload_enabled() - Checks if cursor offload is supported. + * + * @dc: pointer to DC object + * + * Return: true if cursor offload is supported, false otherwise + */ +bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc); + /** * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required. * diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index 01480a04f85e..ce91e5d28956 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -199,6 +199,8 @@ void dpp_reset(struct dpp *dpp_base) memset(&dpp->scl_data, 0, sizeof(dpp->scl_data)); memset(&dpp->pwl_data, 0, sizeof(dpp->pwl_data)); + + dpp_base->cursor_offload = false; } @@ -484,10 +486,12 @@ void dpp1_set_cursor_position( cur_en = 0; /* not visible beyond top edge*/ if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { - REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + if (!dpp_base->cursor_offload) + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); } + + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->att.cur0_ctl.bits.cur0_enable = cur_en; } void dpp1_cnv_set_optional_cursor_attributes( @@ -497,8 +501,13 @@ void dpp1_cnv_set_optional_cursor_attributes( struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); if (attr) { - REG_UPDATE(CURSOR0_FP_SCALE_BIAS, CUR0_FP_BIAS, attr->bias); - REG_UPDATE(CURSOR0_FP_SCALE_BIAS, CUR0_FP_SCALE, attr->scale); + if (!dpp_base->cursor_offload) { + REG_UPDATE(CURSOR0_FP_SCALE_BIAS, CUR0_FP_BIAS, attr->bias); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS, CUR0_FP_SCALE, attr->scale); + } + + dpp_base->att.fp_scale_bias.bits.fp_bias = attr->bias; + dpp_base->att.fp_scale_bias.bits.fp_scale = attr->scale; } } diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 09be2a90cc79..94d0dc3461d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -396,17 +396,21 @@ void dpp3_set_cursor_attributes( } } - REG_UPDATE_3(CURSOR0_CONTROL, - CUR0_MODE, color_format, - CUR0_EXPANSION_MODE, 0, - CUR0_ROM_EN, cur_rom_en); + if (!dpp_base->cursor_offload) + REG_UPDATE_3(CURSOR0_CONTROL, + CUR0_MODE, color_format, + CUR0_EXPANSION_MODE, 0, + CUR0_ROM_EN, cur_rom_en); if (color_format == CURSOR_MODE_MONO) { /* todo: clarify what to program these to */ - REG_UPDATE(CURSOR0_COLOR0, - CUR0_COLOR0, 0x00000000); - REG_UPDATE(CURSOR0_COLOR1, - CUR0_COLOR1, 0xFFFFFFFF); + + if (!dpp_base->cursor_offload) { + REG_UPDATE(CURSOR0_COLOR0, + CUR0_COLOR0, 0x00000000); + REG_UPDATE(CURSOR0_COLOR1, + CUR0_COLOR1, 0xFFFFFFFF); + } } dpp_base->att.cur0_ctl.bits.expansion_mode = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 7aab77b58869..3adc17f2fc35 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -103,17 +103,21 @@ void dpp401_set_cursor_attributes( } } - REG_UPDATE_3(CURSOR0_CONTROL, - CUR0_MODE, color_format, - CUR0_EXPANSION_MODE, 0, - CUR0_ROM_EN, cur_rom_en); + if (!dpp_base->cursor_offload) + REG_UPDATE_3(CURSOR0_CONTROL, + CUR0_MODE, color_format, + CUR0_EXPANSION_MODE, 0, + CUR0_ROM_EN, cur_rom_en); if (color_format == CURSOR_MODE_MONO) { /* todo: clarify what to program these to */ - REG_UPDATE(CURSOR0_COLOR0, - CUR0_COLOR0, 0x00000000); - REG_UPDATE(CURSOR0_COLOR1, - CUR0_COLOR1, 0xFFFFFFFF); + + if (!dpp_base->cursor_offload) { + REG_UPDATE(CURSOR0_COLOR0, + CUR0_COLOR0, 0x00000000); + REG_UPDATE(CURSOR0_COLOR1, + CUR0_COLOR1, 0xFFFFFFFF); + } } dpp_base->att.cur0_ctl.bits.expansion_mode = 0; @@ -132,10 +136,11 @@ void dpp401_set_cursor_position( uint32_t cur_en = pos->enable ? 1 : 0; if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) { - REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); - - dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + if (!dpp_base->cursor_offload) + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); } + + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; } void dpp401_set_optional_cursor_attributes( @@ -145,10 +150,17 @@ void dpp401_set_optional_cursor_attributes( struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); if (attr) { - REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, attr->bias); - REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, attr->scale); - REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, attr->bias); - REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, attr->scale); + if (!dpp_base->cursor_offload) { + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, attr->bias); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, attr->scale); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, attr->bias); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, attr->scale); + } + + dpp_base->att.fp_scale_bias_g_y.bits.fp_bias_g_y = attr->bias; + dpp_base->att.fp_scale_bias_g_y.bits.fp_scale_g_y = attr->scale; + dpp_base->att.fp_scale_bias_rb_crcb.bits.fp_bias_rb_crcb = attr->bias; + dpp_base->att.fp_scale_bias_rb_crcb.bits.fp_scale_rb_crcb = attr->scale; } } diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index 9b026600b90e..6378e3fd7249 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -550,6 +550,7 @@ void hubp_reset(struct hubp *hubp) { memset(&hubp->pos, 0, sizeof(hubp->pos)); memset(&hubp->att, 0, sizeof(hubp->att)); + hubp->cursor_offload = false; } void hubp1_program_surface_config( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 91259b896e03..92288de4cc10 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -613,26 +613,28 @@ void hubp2_cursor_set_attributes( hubp->curs_attr = *attr; - REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, - CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); - REG_UPDATE(CURSOR_SURFACE_ADDRESS, - CURSOR_SURFACE_ADDRESS, attr->address.low_part); - - REG_UPDATE_2(CURSOR_SIZE, - CURSOR_WIDTH, attr->width, - CURSOR_HEIGHT, attr->height); - - REG_UPDATE_4(CURSOR_CONTROL, - CURSOR_MODE, attr->color_format, - CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, - CURSOR_PITCH, hw_pitch, - CURSOR_LINES_PER_CHUNK, lpc); - - REG_SET_2(CURSOR_SETTINGS, 0, - /* no shift of the cursor HDL schedule */ - CURSOR0_DST_Y_OFFSET, 0, - /* used to shift the cursor chunk request deadline */ - CURSOR0_CHUNK_HDL_ADJUST, 3); + if (!hubp->cursor_offload) { + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + } hubp->att.SURFACE_ADDR_HIGH = attr->address.high_part; hubp->att.SURFACE_ADDR = attr->address.low_part; @@ -1059,23 +1061,28 @@ void hubp2_cursor_set_position( cur_en = 0; /* not visible beyond top edge*/ if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) { - if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + bool cursor_not_programmed = hubp->att.SURFACE_ADDR == 0 && hubp->att.SURFACE_ADDR_HIGH == 0; + + if (cur_en && cursor_not_programmed) hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, - CURSOR_ENABLE, cur_en); + if (!hubp->cursor_offload) + REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); } - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); + if (!hubp->cursor_offload) { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, pos->x, + CURSOR_Y_POSITION, pos->y); - REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, pos->x_hotspot, - CURSOR_HOT_SPOT_Y, pos->y_hotspot); + REG_SET_2(CURSOR_HOT_SPOT, 0, + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); + + REG_SET(CURSOR_DST_OFFSET, 0, + CURSOR_DST_X_OFFSET, dst_x_offset); + } - REG_SET(CURSOR_DST_OFFSET, 0, - CURSOR_DST_X_OFFSET, dst_x_offset); /* TODO Handle surface pixel formats other than 4:4:4 */ /* Cursor Position Register Config */ hubp->pos.cur_ctl.bits.cur_enable = cur_en; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index c88781de6d18..fa62e40a9858 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3090,6 +3090,9 @@ static void dcn10_update_dchubp_dpp( } if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { + if (dc->hwss.abort_cursor_offload_update) + dc->hwss.abort_cursor_offload_update(dc, pipe_ctx); + dc->hwss.set_cursor_attribute(pipe_ctx); dc->hwss.set_cursor_position(pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index cf9e8ce784ce..6bd905905984 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1793,6 +1793,9 @@ void dcn20_update_dchubp_dpp( if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || pipe_ctx->update_flags.bits.scaler || viewport_changed == true) && pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { + if (dc->hwss.abort_cursor_offload_update) + dc->hwss.abort_cursor_offload_update(dc, pipe_ctx); + dc->hwss.set_cursor_attribute(pipe_ctx); dc->hwss.set_cursor_position(pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 7ea3fe48b329..404ff00c7130 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1593,3 +1593,141 @@ void dcn35_hardware_release(struct dc *dc) if (dc->hwss.hw_block_power_up) dc->hwss.hw_block_power_up(dc, &pg_update_state); } + +void dcn35_abort_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe) +{ + if (!dc_dmub_srv_is_cursor_offload_enabled(dc)) + return; + + /* + * Insert a blank update to modify the write index and set pipe_mask to 0. + * + * While the DMU is interlocked with driver full pipe programming via + * the DMU HW lock, if the cursor update begins to execute after a full + * pipe programming occurs there are two possible issues: + * + * 1. Outdated cursor information is programmed, replacing the current update + * 2. The cursor update in firmware holds the cursor lock, preventing + * the current update from being latched atomically in the same frame + * as the rest of the update. + * + * This blank update, treated as a no-op, will allow the firmware to skip + * the programming. + */ + + if (dc->hwss.begin_cursor_offload_update) + dc->hwss.begin_cursor_offload_update(dc, pipe); + + if (dc->hwss.commit_cursor_offload_update) + dc->hwss.commit_cursor_offload_update(dc, pipe); +} + +void dcn35_begin_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe) +{ + volatile struct dmub_cursor_offload_v1 *cs = dc->ctx->dmub_srv->dmub->cursor_offload_v1; + const struct pipe_ctx *top_pipe = resource_get_otg_master(pipe); + uint32_t stream_idx, write_idx, payload_idx; + + if (!top_pipe) + return; + + stream_idx = top_pipe->pipe_idx; + write_idx = cs->offload_streams[stream_idx].write_idx + 1; /* new payload (+1) */ + payload_idx = write_idx % ARRAY_SIZE(cs->offload_streams[stream_idx].payloads); + + cs->offload_streams[stream_idx].payloads[payload_idx].write_idx_start = write_idx; + + if (pipe->plane_res.hubp) + pipe->plane_res.hubp->cursor_offload = true; + + if (pipe->plane_res.dpp) + pipe->plane_res.dpp->cursor_offload = true; +} + +void dcn35_commit_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe) +{ + volatile struct dmub_cursor_offload_v1 *cs = dc->ctx->dmub_srv->dmub->cursor_offload_v1; + volatile struct dmub_shared_state_cursor_offload_stream_v1 *shared_stream; + const struct pipe_ctx *top_pipe = resource_get_otg_master(pipe); + uint32_t stream_idx, write_idx, payload_idx; + + if (pipe->plane_res.hubp) + pipe->plane_res.hubp->cursor_offload = false; + + if (pipe->plane_res.dpp) + pipe->plane_res.dpp->cursor_offload = false; + + if (!top_pipe) + return; + + stream_idx = top_pipe->pipe_idx; + write_idx = cs->offload_streams[stream_idx].write_idx + 1; /* new payload (+1) */ + payload_idx = write_idx % ARRAY_SIZE(cs->offload_streams[stream_idx].payloads); + + shared_stream = &dc->ctx->dmub_srv->dmub->shared_state[DMUB_SHARED_STATE_FEATURE__CURSOR_OFFLOAD_V1] + .data.cursor_offload_v1.offload_streams[stream_idx]; + + shared_stream->last_write_idx = write_idx; + + cs->offload_streams[stream_idx].write_idx = write_idx; + cs->offload_streams[stream_idx].payloads[payload_idx].write_idx_finish = write_idx; +} + +void dcn35_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe) +{ + volatile struct dmub_cursor_offload_v1 *cs = dc->ctx->dmub_srv->dmub->cursor_offload_v1; + const struct pipe_ctx *top_pipe = resource_get_otg_master(pipe); + const struct hubp *hubp = pipe->plane_res.hubp; + const struct dpp *dpp = pipe->plane_res.dpp; + volatile struct dmub_cursor_offload_pipe_data_dcn30_v1 *p; + uint32_t stream_idx, write_idx, payload_idx; + + if (!top_pipe || !hubp || !dpp) + return; + + stream_idx = top_pipe->pipe_idx; + write_idx = cs->offload_streams[stream_idx].write_idx + 1; /* new payload (+1) */ + payload_idx = write_idx % ARRAY_SIZE(cs->offload_streams[stream_idx].payloads); + + p = &cs->offload_streams[stream_idx].payloads[payload_idx].pipe_data[pipe->pipe_idx].dcn30; + + p->CURSOR0_0_CURSOR_SURFACE_ADDRESS = hubp->att.SURFACE_ADDR; + p->CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH = hubp->att.SURFACE_ADDR_HIGH; + p->CURSOR0_0_CURSOR_SIZE__CURSOR_WIDTH = hubp->att.size.bits.width; + p->CURSOR0_0_CURSOR_SIZE__CURSOR_HEIGHT = hubp->att.size.bits.height; + p->CURSOR0_0_CURSOR_POSITION__CURSOR_X_POSITION = hubp->pos.position.bits.x_pos; + p->CURSOR0_0_CURSOR_POSITION__CURSOR_Y_POSITION = hubp->pos.position.bits.y_pos; + p->CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_X = hubp->pos.hot_spot.bits.x_hot; + p->CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_Y = hubp->pos.hot_spot.bits.y_hot; + p->CURSOR0_0_CURSOR_DST_OFFSET__CURSOR_DST_X_OFFSET = hubp->pos.dst_offset.bits.dst_x_offset; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_ENABLE = hubp->pos.cur_ctl.bits.cur_enable; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE = hubp->att.cur_ctl.bits.mode; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY = hubp->pos.cur_ctl.bits.cur_2x_magnify; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH = hubp->att.cur_ctl.bits.pitch; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->pos.cur_ctl.bits.line_per_chunk; + + p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_ENABLE = dpp->att.cur0_ctl.bits.cur0_enable; + p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_MODE = dpp->att.cur0_ctl.bits.mode; + p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_EXPANSION_MODE = dpp->att.cur0_ctl.bits.expansion_mode; + p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_ROM_EN = dpp->att.cur0_ctl.bits.cur0_rom_en; + p->CNVC_CUR0_CURSOR0_COLOR0__CUR0_COLOR0 = 0x000000; + p->CNVC_CUR0_CURSOR0_COLOR1__CUR0_COLOR1 = 0xFFFFFF; + p->CNVC_CUR0_CURSOR0_FP_SCALE_BIAS__CUR0_FP_BIAS = dpp->att.fp_scale_bias.bits.fp_bias; + p->CNVC_CUR0_CURSOR0_FP_SCALE_BIAS__CUR0_FP_SCALE = dpp->att.fp_scale_bias.bits.fp_scale; + + p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_DST_Y_OFFSET = hubp->att.settings.bits.dst_y_offset; + p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_CHUNK_HDL_ADJUST = hubp->att.settings.bits.chunk_hdl_adjust; + + cs->offload_streams[stream_idx].payloads[payload_idx].pipe_mask |= (1u << pipe->pipe_idx); +} + +void dcn35_notify_cursor_offload_drr_update(struct dc *dc, struct dc_state *context, + const struct dc_stream_state *stream) +{ + dc_dmub_srv_control_cursor_offload(dc, context, stream, true); +} + +void dcn35_program_cursor_offload_now(struct dc *dc, const struct pipe_ctx *pipe) +{ + dc_dmub_srv_program_cursor_now(dc, pipe); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index 0b1d6f608edd..1ff41dba556c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -101,4 +101,12 @@ bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); void dcn35_hardware_release(struct dc *dc); +void dcn35_abort_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe); +void dcn35_begin_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe); +void dcn35_commit_cursor_offload_update(struct dc *dc, const struct pipe_ctx *pipe); +void dcn35_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe); +void dcn35_notify_cursor_offload_drr_update(struct dc *dc, struct dc_state *context, + const struct dc_stream_state *stream); +void dcn35_program_cursor_offload_now(struct dc *dc, const struct pipe_ctx *pipe); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index f2f16a0bdb4f..5a66c9db2670 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -86,6 +86,12 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_cursor_position = dcn10_set_cursor_position, .set_cursor_attribute = dcn10_set_cursor_attribute, .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .abort_cursor_offload_update = dcn35_abort_cursor_offload_update, + .begin_cursor_offload_update = dcn35_begin_cursor_offload_update, + .commit_cursor_offload_update = dcn35_commit_cursor_offload_update, + .update_cursor_offload_pipe = dcn35_update_cursor_offload_pipe, + .notify_cursor_offload_drr_update = dcn35_notify_cursor_offload_drr_update, + .program_cursor_offload_now = dcn35_program_cursor_offload_now, .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, .set_clock = dcn10_set_clock, .get_clock = dcn10_get_clock, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index ab21da435a36..71c2cfa4df75 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1473,7 +1473,10 @@ void dcn401_dmub_hw_control_lock(struct dc *dc, /* use always for now */ union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; - if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) + if (!dc->ctx || !dc->ctx->dmub_srv) + return; + + if (!dc->debug.fams2_config.bits.enable && !dc_dmub_srv_is_cursor_offload_enabled(dc)) return; hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK; @@ -2669,3 +2672,58 @@ void dcn401_plane_atomic_power_down(struct dc *dc, if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); } + +void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe) +{ + volatile struct dmub_cursor_offload_v1 *cs = dc->ctx->dmub_srv->dmub->cursor_offload_v1; + const struct pipe_ctx *top_pipe = resource_get_otg_master(pipe); + const struct hubp *hubp = pipe->plane_res.hubp; + const struct dpp *dpp = pipe->plane_res.dpp; + volatile struct dmub_cursor_offload_pipe_data_dcn401_v1 *p; + uint32_t stream_idx, write_idx, payload_idx; + + if (!top_pipe || !hubp || !dpp) + return; + + stream_idx = top_pipe->pipe_idx; + write_idx = cs->offload_streams[stream_idx].write_idx; + payload_idx = write_idx % ARRAY_SIZE(cs->offload_streams[stream_idx].payloads); + + p = &cs->offload_streams[stream_idx].payloads[payload_idx].pipe_data[pipe->pipe_idx].dcn401; + + p->CURSOR0_0_CURSOR_SURFACE_ADDRESS = hubp->att.SURFACE_ADDR; + p->CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH = hubp->att.SURFACE_ADDR_HIGH; + p->CURSOR0_0_CURSOR_SIZE__CURSOR_WIDTH = hubp->att.size.bits.width; + p->CURSOR0_0_CURSOR_SIZE__CURSOR_HEIGHT = hubp->att.size.bits.height; + p->CURSOR0_0_CURSOR_POSITION__CURSOR_X_POSITION = hubp->pos.position.bits.x_pos; + p->CURSOR0_0_CURSOR_POSITION__CURSOR_Y_POSITION = hubp->pos.position.bits.y_pos; + p->CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_X = hubp->pos.hot_spot.bits.x_hot; + p->CURSOR0_0_CURSOR_HOT_SPOT__CURSOR_HOT_SPOT_Y = hubp->pos.hot_spot.bits.y_hot; + p->CURSOR0_0_CURSOR_DST_OFFSET__CURSOR_DST_X_OFFSET = hubp->pos.dst_offset.bits.dst_x_offset; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_ENABLE = hubp->pos.cur_ctl.bits.cur_enable; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE = hubp->att.cur_ctl.bits.mode; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY = hubp->pos.cur_ctl.bits.cur_2x_magnify; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH = hubp->att.cur_ctl.bits.pitch; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->pos.cur_ctl.bits.line_per_chunk; + + p->CM_CUR0_CURSOR0_CONTROL__CUR0_ENABLE = dpp->att.cur0_ctl.bits.cur0_enable; + p->CM_CUR0_CURSOR0_CONTROL__CUR0_MODE = dpp->att.cur0_ctl.bits.mode; + p->CM_CUR0_CURSOR0_CONTROL__CUR0_EXPANSION_MODE = dpp->att.cur0_ctl.bits.expansion_mode; + p->CM_CUR0_CURSOR0_CONTROL__CUR0_ROM_EN = dpp->att.cur0_ctl.bits.cur0_rom_en; + p->CM_CUR0_CURSOR0_COLOR0__CUR0_COLOR0 = 0x000000; + p->CM_CUR0_CURSOR0_COLOR1__CUR0_COLOR1 = 0xFFFFFF; + + p->CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y__CUR0_FP_BIAS_G_Y = + dpp->att.fp_scale_bias_g_y.bits.fp_bias_g_y; + p->CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y__CUR0_FP_SCALE_G_Y = + dpp->att.fp_scale_bias_g_y.bits.fp_scale_g_y; + p->CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB__CUR0_FP_BIAS_RB_CRCB = + dpp->att.fp_scale_bias_rb_crcb.bits.fp_bias_rb_crcb; + p->CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB__CUR0_FP_SCALE_RB_CRCB = + dpp->att.fp_scale_bias_rb_crcb.bits.fp_scale_rb_crcb; + + p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_DST_Y_OFFSET = hubp->att.settings.bits.dst_y_offset; + p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_CHUNK_HDL_ADJUST = hubp->att.settings.bits.chunk_hdl_adjust; + + cs->offload_streams[stream_idx].payloads[payload_idx].pipe_mask |= (1u << pipe->pipe_idx); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 9591657d8eee..f489bb7a4c26 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -110,4 +110,6 @@ void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); void dcn401_initialize_min_clocks(struct dc *dc); +void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe); + #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index a221b8cb6d4d..1c736b7e3300 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -9,6 +9,7 @@ #include "dcn30/dcn30_hwseq.h" #include "dcn31/dcn31_hwseq.h" #include "dcn32/dcn32_hwseq.h" +#include "dcn35/dcn35_hwseq.h" #include "dcn401/dcn401_hwseq.h" #include "dcn401_init.h" @@ -60,6 +61,12 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .set_cursor_position = dcn401_set_cursor_position, .set_cursor_attribute = dcn10_set_cursor_attribute, .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .abort_cursor_offload_update = dcn35_abort_cursor_offload_update, + .begin_cursor_offload_update = dcn35_begin_cursor_offload_update, + .commit_cursor_offload_update = dcn35_commit_cursor_offload_update, + .update_cursor_offload_pipe = dcn401_update_cursor_offload_pipe, + .notify_cursor_offload_drr_update = dcn35_notify_cursor_offload_drr_update, + .program_cursor_offload_now = dcn35_program_cursor_offload_now, .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, .set_clock = dcn10_set_clock, .get_clock = dcn10_get_clock, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 619ac4dfff07..a937a2b2135e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -154,6 +154,11 @@ struct dmub_hw_control_lock_fast_params { bool lock; }; +struct program_cursor_update_now_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + union block_sequence_params { struct update_plane_addr_params update_plane_addr_params; struct subvp_pipe_control_lock_fast_params subvp_pipe_control_lock_fast_params; @@ -174,6 +179,7 @@ union block_sequence_params { struct subvp_save_surf_addr subvp_save_surf_addr; struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params; struct dmub_hw_control_lock_fast_params dmub_hw_control_lock_fast_params; + struct program_cursor_update_now_params program_cursor_update_now_params; }; enum block_sequence_func { @@ -196,6 +202,7 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_HW_CONTROL_LOCK_FAST, + PROGRAM_CURSOR_UPDATE_NOW, /* This must be the last value in this enum, add new ones above */ HWSS_BLOCK_SEQUENCE_FUNC_COUNT }; @@ -310,6 +317,13 @@ struct hw_sequencer_funcs { void (*set_cursor_position)(struct pipe_ctx *pipe); void (*set_cursor_attribute)(struct pipe_ctx *pipe); void (*set_cursor_sdr_white_level)(struct pipe_ctx *pipe); + void (*abort_cursor_offload_update)(struct dc *dc, const struct pipe_ctx *pipe); + void (*begin_cursor_offload_update)(struct dc *dc, const struct pipe_ctx *pipe); + void (*commit_cursor_offload_update)(struct dc *dc, const struct pipe_ctx *pipe); + void (*update_cursor_offload_pipe)(struct dc *dc, const struct pipe_ctx *pipe); + void (*notify_cursor_offload_drr_update)(struct dc *dc, struct dc_state *context, + const struct dc_stream_state *stream); + void (*program_cursor_offload_now)(struct dc *dc, const struct pipe_ctx *pipe); /* Colour Related */ void (*program_gamut_remap)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h b/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h index 45645f9fd86c..081831230821 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h @@ -83,12 +83,34 @@ union reg_cur0_control_cfg { } bits; uint32_t raw; }; + struct cursor_position_cache_dpp { union reg_cur0_control_cfg cur0_ctl; }; struct cursor_attribute_cache_dpp { union reg_cur0_control_cfg cur0_ctl; + union reg_cur0_fp_scale_bias { + struct { + uint32_t fp_bias: 16; + uint32_t fp_scale: 16; + } bits; + uint32_t raw; + } fp_scale_bias; + union reg_cur0_fp_scale_bias_g_y { + struct { + uint32_t fp_bias_g_y: 16; + uint32_t fp_scale_g_y: 16; + } bits; + uint32_t raw; + } fp_scale_bias_g_y; + union reg_cur0_fp_scale_bias_rb_crcb { + struct { + uint32_t fp_bias_rb_crcb: 16; + uint32_t fp_scale_rb_crcb: 16; + } bits; + uint32_t raw; + } fp_scale_bias_rb_crcb; }; struct cursor_attributes_cfg { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 1b7c085dc2cc..09c224691618 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -84,6 +84,7 @@ struct dpp { struct pwl_params shaper_params; bool cm_bypass_mode; + bool cursor_offload; struct cursor_position_cache_dpp pos; struct cursor_attribute_cache_dpp att; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 2b874d2cc61c..5998a20a18c4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -126,6 +126,7 @@ struct hubp { int mpcc_id; struct dc_cursor_attributes curs_attr; struct dc_cursor_position curs_pos; + bool cursor_offload; bool power_gated; struct cursor_position_cache_hubp pos; diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 338fdc651f2c..9012a7ba1602 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -132,6 +132,7 @@ enum dmub_window_id { DMUB_WINDOW_IB_MEM, DMUB_WINDOW_SHARED_STATE, DMUB_WINDOW_LSDMA_BUFFER, + DMUB_WINDOW_CURSOR_OFFLOAD, DMUB_WINDOW_TOTAL, }; @@ -535,7 +536,8 @@ struct dmub_srv_create_params { * @fw_version: the current firmware version, if any * @is_virtual: false if hardware support only * @shared_state: dmub shared state between firmware and driver - * @fw_state: dmub firmware state pointer + * @cursor_offload_v1: Cursor offload state + * @fw_state: dmub firmware state pointer (debug purpose only) */ struct dmub_srv { enum dmub_asic asic; @@ -544,7 +546,9 @@ struct dmub_srv { bool is_virtual; struct dmub_fb scratch_mem_fb; struct dmub_fb ib_mem_gart; + struct dmub_fb cursor_offload_fb; volatile struct dmub_shared_state_feature_block *shared_state; + volatile struct dmub_cursor_offload_v1 *cursor_offload_v1; volatile const struct dmub_fw_state *fw_state; /* private: internal use only */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index b17a19400c06..0244c9b44ecc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -564,10 +564,11 @@ enum dmub_status window_sizes[DMUB_WINDOW_4_MAILBOX] = DMUB_MAILBOX_SIZE; window_sizes[DMUB_WINDOW_5_TRACEBUFF] = trace_buffer_size; window_sizes[DMUB_WINDOW_6_FW_STATE] = fw_state_size; - window_sizes[DMUB_WINDOW_7_SCRATCH_MEM] = DMUB_SCRATCH_MEM_SIZE; + window_sizes[DMUB_WINDOW_7_SCRATCH_MEM] = dmub_align(DMUB_SCRATCH_MEM_SIZE, 64); window_sizes[DMUB_WINDOW_IB_MEM] = DMUB_IB_MEM_SIZE; window_sizes[DMUB_WINDOW_SHARED_STATE] = max(DMUB_FW_HEADER_SHARED_STATE_SIZE, shared_state_size); window_sizes[DMUB_WINDOW_LSDMA_BUFFER] = DMUB_LSDMA_RB_SIZE; + window_sizes[DMUB_WINDOW_CURSOR_OFFLOAD] = dmub_align(sizeof(struct dmub_cursor_offload_v1), 64); out->fb_size = dmub_srv_calc_regions_for_memory_type(params, out, window_sizes, DMUB_WINDOW_MEMORY_TYPE_FB); @@ -652,21 +653,22 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, struct dmub_fb *mail_fb = params->fb[DMUB_WINDOW_4_MAILBOX]; struct dmub_fb *tracebuff_fb = params->fb[DMUB_WINDOW_5_TRACEBUFF]; struct dmub_fb *fw_state_fb = params->fb[DMUB_WINDOW_6_FW_STATE]; - struct dmub_fb *scratch_mem_fb = params->fb[DMUB_WINDOW_7_SCRATCH_MEM]; - struct dmub_fb *ib_mem_gart = params->fb[DMUB_WINDOW_IB_MEM]; struct dmub_fb *shared_state_fb = params->fb[DMUB_WINDOW_SHARED_STATE]; struct dmub_rb_init_params rb_params, outbox0_rb_params; struct dmub_window cw0, cw1, cw2, cw3, cw4, cw5, cw6, region6; struct dmub_region inbox1, outbox1, outbox0; + uint32_t i; + if (!dmub->sw_init) return DMUB_STATUS_INVALID; - if (!inst_fb || !stack_fb || !data_fb || !bios_fb || !mail_fb || - !tracebuff_fb || !fw_state_fb || !scratch_mem_fb || !ib_mem_gart) { - ASSERT(0); - return DMUB_STATUS_INVALID; + for (i = 0; i < DMUB_WINDOW_TOTAL; ++i) { + if (!params->fb[i]) { + ASSERT(0); + return DMUB_STATUS_INVALID; + } } dmub->fb_base = params->fb_base; @@ -748,9 +750,11 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, dmub->shared_state = shared_state_fb->cpu_addr; - dmub->scratch_mem_fb = *scratch_mem_fb; + dmub->scratch_mem_fb = *params->fb[DMUB_WINDOW_7_SCRATCH_MEM]; + dmub->ib_mem_gart = *params->fb[DMUB_WINDOW_IB_MEM]; - dmub->ib_mem_gart = *ib_mem_gart; + dmub->cursor_offload_fb = *params->fb[DMUB_WINDOW_CURSOR_OFFLOAD]; + dmub->cursor_offload_v1 = (struct dmub_cursor_offload_v1 *)dmub->cursor_offload_fb.cpu_addr; if (dmub->hw_funcs.setup_windows) dmub->hw_funcs.setup_windows(dmub, &cw2, &cw3, &cw4, &cw5, &cw6, ®ion6); -- cgit v1.2.3 From 6d40b93194a0fdeb07fa63e92227940fde04e527 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 26 Sep 2025 16:15:01 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.1.30.0 Add new SMART_POWER_HDR commands to optimize power consumption on certain OLED LED panels by sending MaxCLL per frame to TCON. Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 104 ++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 0a250699999c..be6151d529d8 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -1724,6 +1724,11 @@ enum dmub_cmd_type { */ DMUB_CMD__CURSOR_OFFLOAD = 92, + /** + * Command type used for all SMART_POWER_HDR commands. + */ + DMUB_CMD__SMART_POWER_HDR = 93, + /** * Command type use for VBIOS shared commands. */ @@ -4392,6 +4397,45 @@ enum replay_enable { REPLAY_ENABLE = 1, }; +/** + * Data passed from driver to FW in a DMUB_CMD__SMART_POWER_HDR_ENABLE command. + */ +struct dmub_rb_cmd_smart_power_hdr_enable_data { + /** + * SMART_POWER_HDR enable or disable. + */ + uint8_t enable; + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + + uint16_t peak_nits; + /** + * OTG HW instance. + */ + uint8_t otg_inst; + /** + * DIG FE HW instance. + */ + uint8_t digfe_inst; + /** + * DIG BE HW instance. + */ + uint8_t digbe_inst; + uint8_t debugcontrol; + /* + * vertical interrupt trigger line + */ + uint32_t triggerline; + + uint16_t fixed_max_cll; + + uint8_t pad[2]; +}; + /** * Data passed from driver to FW in a DMUB_CMD__REPLAY_ENABLE command. */ @@ -4725,6 +4769,58 @@ union dmub_replay_cmd_set { struct dmub_cmd_replay_set_general_cmd_data set_general_cmd_data; }; +/** + * SMART POWER HDR command sub-types. + */ +enum dmub_cmd_smart_power_hdr_type { + + /** + * Enable/Disable SMART_POWER_HDR. + */ + DMUB_CMD__SMART_POWER_HDR_ENABLE = 1, + /** + * Get current MaxCLL value if SMART POWER HDR is enabled. + */ + DMUB_CMD__SMART_POWER_HDR_GETMAXCLL = 2, +}; + +/** + * Definition of a DMUB_CMD__SMART_POWER_HDR command. + */ +struct dmub_rb_cmd_smart_power_hdr_enable { + /** + * Command header. + */ + struct dmub_cmd_header header; + + struct dmub_rb_cmd_smart_power_hdr_enable_data data; +}; + +struct dmub_cmd_smart_power_hdr_getmaxcll_input { + uint8_t panel_inst; + uint8_t pad[3]; +}; + +struct dmub_cmd_smart_power_hdr_getmaxcll_output { + uint16_t current_max_cll; + uint8_t pad[2]; +}; + +/** + * Definition of a DMUB_CMD__SMART_POWER_HDR command. + */ +struct dmub_rb_cmd_smart_power_hdr_getmaxcll { + struct dmub_cmd_header header; /**< Command header */ + /** + * Data passed from driver to FW in a DMUB_CMD__SMART_POWER_HDR_GETMAXCLL command. + */ + union dmub_cmd_smart_power_hdr_getmaxcll_data { + struct dmub_cmd_smart_power_hdr_getmaxcll_input input; /**< Input */ + struct dmub_cmd_smart_power_hdr_getmaxcll_output output; /**< Output */ + uint32_t output_raw; /**< Raw data output */ + } data; +}; + /** * Set of HW components that can be locked. * @@ -6594,6 +6690,14 @@ union dmub_rb_cmd { * - DMUB_CMD__CURSOR_OFFLOAD_STREAM_UPDATE_DRR */ struct dmub_rb_cmd_cursor_offload_stream_cntl cursor_offload_stream_ctnl; + /** + * Definition of a DMUB_CMD__SMART_POWER_HDR_ENABLE command. + */ + struct dmub_rb_cmd_smart_power_hdr_enable smart_power_hdr_enable; + /** + * Definition of a DMUB_CMD__DMUB_CMD__SMART_POWER_HDR_GETMAXCLL command. + */ + struct dmub_rb_cmd_smart_power_hdr_getmaxcll smart_power_hdr_getmaxcll; }; /** -- cgit v1.2.3 From 0031274f5c1806b7a6c75edb2e356c3b83d12225 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 26 Sep 2025 18:58:54 -0500 Subject: drm/amd/display: Promote DC to 3.2.353 - [FW Promotion] Release 0.1.30.0 - Driver implementation for cursor offloading to DMU - Incorrect Mirror Cositing - Enable Dynamic DTBCLK Switch - Remove comparing uint32_t to zero - Remove inaccessible URL Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0494e6cd3b33..770e36048b5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.352" +#define DC_VER "3.2.353" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From 4944e470a22d467268ace0b2cc5ed90aeb334d13 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 22 Jul 2025 11:14:28 +0800 Subject: drm/amdgpu/userq: extend userq state Extend the userq state for identifying the userq invalid cases. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index c027dd916672..ded33fe76e1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -37,6 +37,7 @@ enum amdgpu_userq_state { AMDGPU_USERQ_STATE_MAPPED, AMDGPU_USERQ_STATE_PREEMPTED, AMDGPU_USERQ_STATE_HUNG, + AMDGPU_USERQ_STATE_INVALID_VA, }; struct amdgpu_mqd_prop; -- cgit v1.2.3 From 17f5bd95511f0ee1a10d2adf301602be1d0a8bf6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 16 Sep 2025 11:20:42 +0530 Subject: drm/amdgpu: Remove redundant return value gfx_v9_4_3_xcc_kcq_init_queue doesn't have a fail condition. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 77f9d5b9a556..fb5585ab52be 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2152,7 +2152,8 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) +static void gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, + bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -2186,8 +2187,6 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, b atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); } - - return 0; } static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id) @@ -2220,7 +2219,7 @@ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring; - int i, r; + int i; gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id); @@ -2228,9 +2227,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); - if (r) - return r; + gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); } return amdgpu_gfx_enable_kcq(adev, xcc_id); @@ -3605,11 +3602,8 @@ pipe_reset: return r; } - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); - if (r) { - dev_err(adev->dev, "fail to init kcq\n"); - return r; - } + gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); if (r) { -- cgit v1.2.3 From 0c1beb7cbf6659ff9153b918be683b3f56295716 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 5 Sep 2025 10:19:29 +0200 Subject: drm/amdgpu: make non-NULL out fence mandatory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_ttm_copy_mem_to_mem has a single caller, make sure the out fence is non-NULL to simplify the code. Since none of the pointers should be NULL, we can enable __attribute__((nonnull))__. While at it make the function static since it's only used from amdgpuu_ttm.c. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 6 ------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2fb8cd79583e..86fc1809681c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -286,12 +286,13 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, * move and different for a BO to BO copy. * */ -int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - const struct amdgpu_copy_mem *src, - const struct amdgpu_copy_mem *dst, - uint64_t size, bool tmz, - struct dma_resv *resv, - struct dma_fence **f) +__attribute__((nonnull)) +static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, + uint64_t size, bool tmz, + struct dma_resv *resv, + struct dma_fence **f) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor src_mm, dst_mm; @@ -365,9 +366,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } error: mutex_unlock(&adev->mman.gtt_window_lock); - if (f) - *f = dma_fence_get(fence); - dma_fence_put(fence); + *f = fence; return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index bbd6e524de9e..d75866a389f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -170,12 +170,6 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, uint32_t copy_flags); -int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - const struct amdgpu_copy_mem *src, - const struct amdgpu_copy_mem *dst, - uint64_t size, bool tmz, - struct dma_resv *resv, - struct dma_fence **f); int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -- cgit v1.2.3 From c79cf5a7d903a16459846b4296fc3cd988837c54 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Mon, 15 Sep 2025 14:25:44 +0200 Subject: drm/amdgpu: remove gart_window_lock usage from gmc v12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lock was part of the SDMA workaround originally implemented in gmc_v10_0_flush_gpu_tlb (a70cb2176f7ef6f moved it to amdgpu_gmc_flush_gpu_tlb). This means this lock is useless and be safely dropped. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index f4a19357ccbc..cad2d19105c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -312,9 +312,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; } - mutex_lock(&adev->mman.gtt_window_lock); gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0); - mutex_unlock(&adev->mman.gtt_window_lock); return; } -- cgit v1.2.3 From 4d0858d4eb28763fdbcf2e8e17dde211db83ac43 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 8 Oct 2025 10:26:47 +0530 Subject: drm/amdgpu: Skip SDMA suspend during mode-2 reset For SDMA IP versions >= v4.4.2, firmware will take care of quiescing SDMA before mode-2 reset. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aldebaran.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 9569dc16dd3d..daa7b23bc775 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -88,6 +88,10 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) uint32_t ip_block; int r, i; + /* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */ + if (adev->aid_mask) + ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); -- cgit v1.2.3 From fb3c34ef49947631a4434c7157612d3c8114dfc2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 6 Oct 2025 14:23:59 -0400 Subject: drm/amdgpu/mes: adjust the VMID masks The firmware limits the max vmid, but align the settings with the hw limits as well just to be safe. Reviewed-by: Shaoyun liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 4883adcfbb4b..45bf8309cb37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -105,8 +105,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) spin_lock_init(&adev->mes.ring_lock[i]); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; - adev->mes.vmid_mask_mmhub = 0xffffff00; - adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00; + adev->mes.vmid_mask_mmhub = 0xFF00; + adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00; num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) -- cgit v1.2.3 From 000902683ffca0be3eb49a7305ee5c96fa6728a2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 6 Oct 2025 11:16:20 -0500 Subject: drm/amd: Adjust whitespace for vangogh_ppt A few changes have more whitespace than needed. Clean them up. Reviewed-by: Lijo Lazar Tested-by: Robert Beckett Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2c9869feba61..9fadfa820f5d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2308,8 +2308,7 @@ static int vangogh_get_power_limit(struct smu_context *smu, uint32_t *max_power_limit, uint32_t *min_power_limit) { - struct smu_11_5_power_context *power_context = - smu->smu_power.power_context; + struct smu_11_5_power_context *power_context = smu->smu_power.power_context; uint32_t ppt_limit; int ret = 0; @@ -2345,12 +2344,11 @@ static int vangogh_get_power_limit(struct smu_context *smu, } static int vangogh_get_ppt_limit(struct smu_context *smu, - uint32_t *ppt_limit, - enum smu_ppt_limit_type type, - enum smu_ppt_limit_level level) + uint32_t *ppt_limit, + enum smu_ppt_limit_type type, + enum smu_ppt_limit_level level) { - struct smu_11_5_power_context *power_context = - smu->smu_power.power_context; + struct smu_11_5_power_context *power_context = smu->smu_power.power_context; if (!power_context) return -EOPNOTSUPP; -- cgit v1.2.3 From 861fc60b172d8edc7ac867bbce628488a69abd0d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 1 Oct 2025 13:03:33 -0500 Subject: drm/amd: Remove some unncessary header includes Unnecessary headers can slow down the build, drop em. No intended functional changes. Reviewed-by: Lijo Lazar Tested-by: Robert Beckett Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 554492dfa3c0..fb595a70bbd1 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include "pp_debug.h" #include #include #include @@ -28,12 +27,10 @@ #include #include #include "amd_shared.h" -#include "amd_powerplay.h" #include "power_state.h" #include "amdgpu.h" #include "hwmgr.h" #include "amdgpu_dpm_internal.h" -#include "amdgpu_display.h" static const struct amd_pm_funcs pp_dpm_funcs; -- cgit v1.2.3 From 1bea57ea7544a8058b32d8605075e24aa2814fcc Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 25 Sep 2025 12:09:56 +0200 Subject: drm/amdgpu: reduce queue timeout to 2 seconds v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There has been multiple complains that 10 seconds are usually to long. The original requirement for longer timeout came from compute tests on AMDVLK, since that is no longer a topic reduce the timeout back to 2 seconds for all queues. While at it also remove any special handling for compute queues under SRIOV or pass through. v2: fix checkpatch warning. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 85 ++++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 20 +++---- 2 files changed, 47 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3d032c4e2dce..6d228012187c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4285,58 +4285,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) long timeout; int ret = 0; - /* - * By default timeout for jobs is 10 sec - */ - adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000); - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + /* By default timeout for all queues is 2 sec */ + adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = + adev->video_timeout = msecs_to_jiffies(2000); - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { - while ((timeout_setting = strsep(&input, ",")) && - strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { - ret = kstrtol(timeout_setting, 0, &timeout); - if (ret) - return ret; + if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) + return 0; - if (timeout == 0) { - index++; - continue; - } else if (timeout < 0) { - timeout = MAX_SCHEDULE_TIMEOUT; - dev_warn(adev->dev, "lockup timeout disabled"); - add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); - } else { - timeout = msecs_to_jiffies(timeout); - } + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; - switch (index++) { - case 0: - adev->gfx_timeout = timeout; - break; - case 1: - adev->compute_timeout = timeout; - break; - case 2: - adev->sdma_timeout = timeout; - break; - case 3: - adev->video_timeout = timeout; - break; - default: - break; - } + if (timeout == 0) { + index++; + continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + dev_warn(adev->dev, "lockup timeout disabled"); + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); + } else { + timeout = msecs_to_jiffies(timeout); } - /* - * There is only one value specified and - * it should apply to all non-compute jobs. - */ - if (index == 1) { - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) - adev->compute_timeout = adev->gfx_timeout; + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; } } + /* When only one value specified apply it to all queues. */ + if (index == 1) + adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = + adev->video_timeout = timeout; + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 61268aa82df4..7eac510c2d6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -354,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint * DOC: lockup_timeout (string) * Set GPU scheduler timeout value in ms. * - * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or - * multiple values specified. 0 and negative values are invalidated. They will be adjusted - * to the default timeout. + * The format can be [single value] for setting all timeouts at once or + * [GFX,Compute,SDMA,Video] to set individual timeouts. + * Negative values mean infinity. * - * - With one value specified, the setting will apply to all non-compute jobs. - * - With multiple values specified, the first one will be for GFX. - * The second one is for Compute. The third and fourth ones are - * for SDMA and Video. - * - * By default(with no lockup_timeout settings), the timeout for all jobs is 10000. + * By default(with no lockup_timeout settings), the timeout for all queues is 2000. */ MODULE_PARM_DESC(lockup_timeout, - "GPU lockup timeout in ms (default: 10000 for all jobs. " - "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " - "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); -module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); + "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video]."); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, + sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) -- cgit v1.2.3 From 5cfa33fabf01f2cc0af6b1feed6e65cb81806a37 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 29 Sep 2025 13:52:13 +0800 Subject: drm/amdgpu: add userq object va track helpers Add the userq object virtual address list_add() helpers for tracking the userq obj va address usage. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 44 ++++++++++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 12 ++++++-- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 16 ++++++----- 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 656b8a931dae..52c2d1731aab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -96,6 +96,7 @@ struct amdgpu_bo_va { * if non-zero, cannot unmap from GPU because user queues may still access it */ unsigned int queue_refcount; + atomic_t userq_va_mapped; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 1add21160d21..79c7fa0a9ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -44,10 +44,29 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) return userq_ip_mask; } -int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, - u64 expected_size) +static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, + struct amdgpu_bo_va_mapping *va_map, u64 addr) +{ + struct amdgpu_userq_va_cursor *va_cursor; + struct userq_va_list; + + va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL); + if (!va_cursor) + return -ENOMEM; + + INIT_LIST_HEAD(&va_cursor->list); + va_cursor->gpu_addr = addr; + atomic_set(&va_map->bo_va->userq_va_mapped, 1); + list_add(&va_cursor->list, &queue->userq_va_list); + + return 0; +} + +int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue, + u64 addr, u64 expected_size) { struct amdgpu_bo_va_mapping *va_map; + struct amdgpu_vm *vm = queue->vm; u64 user_addr; u64 size; int r = 0; @@ -67,6 +86,7 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, /* Only validate the userq whether resident in the VM mapping range */ if (user_addr >= va_map->start && va_map->last - user_addr + 1 >= size) { + amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr); amdgpu_bo_unreserve(vm->root.bo); return 0; } @@ -185,6 +205,7 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_userq_fence_driver_free(queue); idr_remove(&uq_mgr->userq_idr, queue_id); + list_del(&queue->userq_va_list); kfree(queue); } @@ -505,14 +526,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - /* Validate the userq virtual address.*/ - if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { - r = -EINVAL; - kfree(queue); - goto unlock; - } + INIT_LIST_HEAD(&queue->userq_va_list); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; @@ -523,6 +537,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) db_info.db_obj = &queue->db_obj; db_info.doorbell_offset = args->in.doorbell_offset; + /* Validate the userq virtual address.*/ + if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) || + amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + r = -EINVAL; + kfree(queue); + goto unlock; + } + /* Convert relative doorbell offset into absolute doorbell index */ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { @@ -548,7 +571,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); if (qid < 0) { drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index ded33fe76e1c..ef6ea01a8053 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -48,6 +48,11 @@ struct amdgpu_userq_obj { struct amdgpu_bo *obj; }; +struct amdgpu_userq_va_cursor { + u64 gpu_addr; + struct list_head list; +}; + struct amdgpu_usermode_queue { int queue_type; enum amdgpu_userq_state state; @@ -67,6 +72,8 @@ struct amdgpu_usermode_queue { u32 xcp_id; int priority; struct dentry *debugfs_queue; + + struct list_head userq_va_list; }; struct amdgpu_userq_funcs { @@ -137,7 +144,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, u32 idx); int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, u32 idx); - -int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, - u64 expected_size); +int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue, + u64 addr, u64 expected_size); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 1cd9eaeef38f..5c63480dda9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -298,8 +298,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } - if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va, - max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE))) + r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va, + max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)); + if (r) goto free_mqd; userq_props->eop_gpu_addr = compute_mqd->eop_va; @@ -330,8 +331,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->tmz_queue = mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; - if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va, - shadow_info.shadow_size)) + r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va, + shadow_info.shadow_size); + if (r) goto free_mqd; kfree(mqd_gfx_v11); @@ -350,9 +352,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, r = -ENOMEM; goto free_mqd; } - - if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va, - shadow_info.csa_size)) + r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va, + shadow_info.csa_size); + if (r) goto free_mqd; userq_props->csa_addr = mqd_sdma_v11->csa_va; -- cgit v1.2.3 From 2a28f9665dca1a142a758477409e6c08fc764c5d Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 9 Oct 2025 16:44:31 +0800 Subject: drm/amdgpu: track the userq bo va for its obj management Track the userq obj for its life time, and reference and dereference the buffer flag at its creating and destroying period. Suggested-by: Alex Deucher Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 79c7fa0a9ff7..5c66f472d055 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -97,6 +97,40 @@ out_err: return r; } +static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_userq_va_cursor *va_cursor) +{ + atomic_set(&mapping->bo_va->userq_va_mapped, 0); + list_del(&va_cursor->list); + kfree(va_cursor); +} + +static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_userq_va_cursor *va_cursor, *tmp; + struct amdgpu_bo_va_mapping *mapping; + int r; + + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) + return r; + + list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); + if (!mapping) { + r = -EINVAL; + goto err; + } + amdgpu_userq_buffer_va_list_del(mapping, va_cursor); + dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", + queue, va_cursor->gpu_addr); + } +err: + amdgpu_bo_unreserve(queue->vm->root.bo); + return r; +} + static int amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) @@ -202,6 +236,8 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + /* Drop the userq reference. */ + amdgpu_userq_buffer_vas_list_cleanup(adev, queue); uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_userq_fence_driver_free(queue); idr_remove(&uq_mgr->userq_idr, queue_id); -- cgit v1.2.3 From 873f44c32797c2f4715a6632eac0807fd1084747 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 22 Jul 2025 13:43:51 +0800 Subject: drm/amdgpu: keeping waiting userq fence infinitely Keeping waiting the userq fence infinitely until hang detection, and then suspend the hang queue and set the fence error. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 5c66f472d055..4cd662ad7be9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -213,19 +213,24 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, return r; } -static void +static int amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct dma_fence *f = queue->last_fence; - int ret; + int ret = 0; if (f && !dma_fence_is_signaled(f)) { - ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); - if (ret <= 0) + ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT); + if (ret <= 0) { drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", f->context, f->seqno); + queue->state = AMDGPU_USERQ_STATE_HUNG; + return -ETIME; + } } + + return ret; } static void -- cgit v1.2.3 From 89926812d3b41d172b812afdafac710504824139 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 9 Oct 2025 16:45:27 +0800 Subject: drm/amdgpu: validate the queue va for resuming the queue It requires validating the userq VA whether is mapped before trying to resume the queue. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 4cd662ad7be9..c1fee1ac2c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -97,6 +97,42 @@ out_err: return r; } +static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) +{ + struct amdgpu_bo_va_mapping *mapping; + bool r; + + if (amdgpu_bo_reserve(vm->root.bo, false)) + return false; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); + if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) + r = true; + else + r = false; + amdgpu_bo_unreserve(vm->root.bo); + + return r; +} + +static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_userq_va_cursor *va_cursor, *tmp; + int r = 0; + + list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { + r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr); + dev_dbg(queue->userq_mgr->adev->dev, + "validate the userq mapping:%p va:%llx r:%d\n", + queue, va_cursor->gpu_addr, r); + } + + if (r != 0) + return true; + + return false; +} + static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, struct amdgpu_userq_va_cursor *va_cursor) { @@ -761,6 +797,14 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + + if (!amdgpu_userq_buffer_vas_mapped(queue)) { + drm_file_err(uq_mgr->file, + "trying restore queue without va mapping\n"); + queue->state = AMDGPU_USERQ_STATE_INVALID_VA; + continue; + } + r = amdgpu_userq_restore_helper(uq_mgr, queue); if (r) ret = r; -- cgit v1.2.3 From 2e7ceac0ea4192e25936c2186e1c44a550f6d96d Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 19 Sep 2025 15:14:41 +0800 Subject: drm/amdgpu: validate userq va for GEM unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user unmaps a userq VA, the driver must ensure the queue has no in-flight jobs. If there is pending work, the kernel should wait for the attached eviction (bookkeeping) fence to signal before deleting the mapping. Suggested-by: Christian König Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 3 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index c1fee1ac2c0f..9ad366f29776 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1195,3 +1195,34 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, mutex_unlock(&adev->userq_mutex); return ret; } + +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t saddr) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_bo_va *bo_va = mapping->bo_va; + struct dma_resv *resv = bo_va->base.bo->tbo.base.resv; + int ret = 0; + + if (!ip_mask) + return 0; + + dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr); + /** + * The userq VA mapping reservation should include the eviction fence, + * if the eviction fence can't signal successfully during unmapping, + * then driver will warn to flag this improper unmap of the userq VA. + * Note: The eviction fence may be attached to different BOs, and this + * unmap is only for one kind of userq VAs, so at this point suppose + * the eviction fence is always unsignaled. + */ + if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) { + ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true, + MAX_SCHEDULE_TIMEOUT); + if (ret <= 0) + return -EBUSY; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index ef6ea01a8053..036d8dd585cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -146,4 +146,7 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, u32 idx); int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue, u64 addr, u64 expected_size); +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t saddr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c1a801203949..3a660ce75913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1952,6 +1952,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; + int r; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -1972,6 +1973,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, return -ENOENT; } + /* It's unlikely to happen that the mapping userq hasn't been idled + * during user requests GEM unmap IOCTL except for forcing the unmap + * from user space. + */ + if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) { + r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); + if (unlikely(r == -EBUSY)) + dev_warn_once(adev->dev, + "Attempt to unmap an active userq buffer\n"); + } + list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); mapping->bo_va = NULL; -- cgit v1.2.3 From e6af507de8594a3965cbfba9f70b312d4216d5fb Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:39 -0500 Subject: drm/amd: Unify shutdown() callback behavior [Why] The shutdown() callback uses amdgpu_ip_suspend() which doesn't notify drm clients during shutdown. This could lead to hangs. [How] Change amdgpu_pci_shutdown() to call the same sequence as suspend/resume. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7eac510c2d6f..a36e15beafeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2558,7 +2558,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) */ if (!amdgpu_passthrough(adev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; - amdgpu_device_ip_suspend(adev); + amdgpu_device_prepare(dev); + amdgpu_device_suspend(dev, true); adev->mp1_state = PP_MP1_STATE_NONE; } -- cgit v1.2.3 From 6062ede68097d21aec896820071257362c965cc2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:40 -0500 Subject: drm/amd: Stop exporting amdgpu_device_ip_suspend() outside amdgpu_device amdgpu_device_ip_suspend() doesn't have a caller outside of amdgpu_device.c. Make it static. No intended functional changes. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6f5b4a0e0a34..d8bb339d4ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1638,7 +1638,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_release_kms(struct drm_device *dev); -int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_prepare(struct drm_device *dev); void amdgpu_device_complete(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6d228012187c..edd086213049 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3895,7 +3895,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) * in each IP into a state suitable for suspend. * Returns 0 on success, negative error code on failure. */ -int amdgpu_device_ip_suspend(struct amdgpu_device *adev) +static int amdgpu_device_ip_suspend(struct amdgpu_device *adev) { int r; -- cgit v1.2.3 From f35f2541780b05d0ebc4cdf5a6e9d8aea6573369 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:41 -0500 Subject: drm/amd: Remove comment about handling errors in amdgpu_device_ip_suspend_phase1() Error handling was introduced in commit e095026f0066 ("drm/amdgpu: validate suspend before function call") so the comment about TODO is no longer needed. Fixes: e095026f0066 ("drm/amdgpu: validate suspend before function call") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index edd086213049..3c1d784d6f3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3780,7 +3780,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) continue; - /* XXX handle errors */ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); if (r) return r; -- cgit v1.2.3 From b7ff2e79240a51e8639ae0c304423bb1cb460bb5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:42 -0500 Subject: drm/amd: Don't always set IP block HW status to false amdgpu_device_ip_suspend_phase2() calls amdgpu_ip_block_suspend() which already sets HW block status to false when succeeding with IP suspend. Remove the explicit call in amdgpu_device_ip_suspend_phase2() so that the status is accurate. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3c1d784d6f3b..70f5ff4c63e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3864,7 +3864,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* XXX handle errors */ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); - adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ if (!amdgpu_sriov_vf(adev)) { -- cgit v1.2.3 From 173360fe49c2cd5ec581056985a7a3d4fbd9f1ab Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:43 -0500 Subject: drm/amd: Pass IP suspend errors up to callers If IP suspend fails the callers should be notified so that they can potentially react. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 70f5ff4c63e7..a734c05cc00e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3862,8 +3862,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; - /* XXX handle errors */ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); + if (r) + return r; /* handle putting the SMC in the appropriate state */ if (!amdgpu_sriov_vf(adev)) { @@ -5218,7 +5219,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_ras_suspend(adev); - amdgpu_device_ip_suspend_phase1(adev); + r = amdgpu_device_ip_suspend_phase1(adev); + if (r) + return r; amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); amdgpu_userq_suspend(adev); @@ -5231,7 +5234,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_fence_driver_hw_fini(adev); - amdgpu_device_ip_suspend_phase2(adev); + r = amdgpu_device_ip_suspend_phase2(adev); + if (r) + return r; if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); -- cgit v1.2.3 From 7877934019d729d1e97ca0160f470e4821bfe553 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:44 -0500 Subject: drm/amd: Fix error handling with multiple userq IDRs If multiple userq IDR are in use and there is an error handling one at suspend or resume it will be silently discarded. Switch the suspend/resume() code to use guards and return immediately. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9ad366f29776..1400114dc934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1076,27 +1076,25 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) struct amdgpu_usermode_queue *queue; struct amdgpu_userq_mgr *uqm, *tmp; int queue_id; - int ret = 0, r; + int r; if (!ip_mask) return 0; - mutex_lock(&adev->userq_mutex); + guard(mutex)(&adev->userq_mutex); list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { cancel_delayed_work_sync(&uqm->resume_work); - mutex_lock(&uqm->userq_mutex); + guard(mutex)(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { if (adev->in_s0ix) r = amdgpu_userq_preempt_helper(uqm, queue); else r = amdgpu_userq_unmap_helper(uqm, queue); if (r) - ret = r; + return r; } - mutex_unlock(&uqm->userq_mutex); } - mutex_unlock(&adev->userq_mutex); - return ret; + return 0; } int amdgpu_userq_resume(struct amdgpu_device *adev) @@ -1105,26 +1103,25 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) struct amdgpu_usermode_queue *queue; struct amdgpu_userq_mgr *uqm, *tmp; int queue_id; - int ret = 0, r; + int r; if (!ip_mask) return 0; - mutex_lock(&adev->userq_mutex); + guard(mutex)(&adev->userq_mutex); list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { - mutex_lock(&uqm->userq_mutex); + guard(mutex)(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { if (adev->in_s0ix) r = amdgpu_userq_restore_helper(uqm, queue); else r = amdgpu_userq_map_helper(uqm, queue); if (r) - ret = r; + return r; } - mutex_unlock(&uqm->userq_mutex); } - mutex_unlock(&adev->userq_mutex); - return ret; + + return 0; } int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, -- cgit v1.2.3 From 1f3cca77943b6c8bfc7342722e2ce7b348484ee3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Oct 2025 12:42:45 -0500 Subject: drm/amd: Pass userq suspend failures up to caller If a userq failed to suspend the rest of the suspend sequence may have problems. Pass the error code up to the caller for a decision on what to do. Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a734c05cc00e..c7c999ae2a28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5224,7 +5224,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return r; amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - amdgpu_userq_suspend(adev); + r = amdgpu_userq_suspend(adev); + if (r) + return r; r = amdgpu_device_evict_resources(adev); if (r) -- cgit v1.2.3 From 16dc933a4fcfcbe453d8e0fa80b18370b7fca228 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Oct 2025 15:07:53 -0400 Subject: drm/amdgpu/userq: drop VCN and VPE doorbell handling VCN and VPE userqs are not yet supported and this code is not correct. Userspace should provide the correct doorbell offset with in their doorbell page for the IP. Adjusting it here will not work as expected as userspace and the queue itself will have different offsets. We need to add a INFO IOCTL query to get the offset and range for each IP within the doorbell page to handle this properly. Cc: Saleemkhan Jamadar Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 1400114dc934..33c54ecd46d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -417,17 +417,6 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, case AMDGPU_HW_IP_DMA: db_size = sizeof(u64); break; - - case AMDGPU_HW_IP_VCN_ENC: - db_size = sizeof(u32); - db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; - break; - - case AMDGPU_HW_IP_VPE: - db_size = sizeof(u32); - db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; - break; - default: drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n", db_info->queue_type); -- cgit v1.2.3 From 5f4f49a41c14890c05faa99881e98cc156ba7e03 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Oct 2025 15:59:04 -0500 Subject: drm/amd: Stop overloading power limit with limit type When passed around internally the upper 8 bits of power limit include the limit type. This is non-obvious without digging into the nuances of each function. Instead pass the limit type as an argument to all applicable layers. Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 2 +- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 ++- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 +-- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++++------ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 1 - 7 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2b0cdb2a2775..87814c2b526e 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -454,7 +454,7 @@ struct amd_pm_funcs { bool gate, int inst); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); - int (*set_power_limit)(void *handle, uint32_t n); + int (*set_power_limit)(void *handle, uint32_t limit_type, uint32_t n); int (*get_power_limit)(void *handle, uint32_t *limit, enum pp_power_limit_level pp_limit_level, enum pp_power_type power_type); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 518d07afc7df..5d08dc3b7110 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1616,6 +1616,7 @@ int amdgpu_dpm_get_power_limit(struct amdgpu_device *adev, } int amdgpu_dpm_set_power_limit(struct amdgpu_device *adev, + uint32_t limit_type, uint32_t limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; @@ -1626,7 +1627,7 @@ int amdgpu_dpm_set_power_limit(struct amdgpu_device *adev, mutex_lock(&adev->pm.mutex); ret = pp_funcs->set_power_limit(adev->powerplay.pp_handle, - limit); + limit_type, limit); mutex_unlock(&adev->pm.mutex); return ret; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index b5fbb0fd1dc0..6bdf185c6b60 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3390,13 +3390,12 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; value = value / 1000000; /* convert to Watt */ - value |= limit_type << 24; err = amdgpu_pm_get_access(adev); if (err < 0) return err; - err = amdgpu_dpm_set_power_limit(adev, value); + err = amdgpu_dpm_set_power_limit(adev, limit_type, value); amdgpu_pm_put_access(adev); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 65c1d98af26c..3bce74f8bb0a 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -553,7 +553,7 @@ int amdgpu_dpm_get_power_limit(struct amdgpu_device *adev, enum pp_power_limit_level pp_limit_level, enum pp_power_type power_type); int amdgpu_dpm_set_power_limit(struct amdgpu_device *adev, - uint32_t limit); + uint32_t limit_type, uint32_t limit); int amdgpu_dpm_is_cclk_dpm_supported(struct amdgpu_device *adev); int amdgpu_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index fb595a70bbd1..76a5353d7f4a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -952,7 +952,7 @@ static int pp_dpm_switch_power_profile(void *handle, return 0; } -static int pp_set_power_limit(void *handle, uint32_t limit) +static int pp_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit) { struct pp_hwmgr *hwmgr = handle; uint32_t max_power_limit; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb8086859857..1c5f37cd5b75 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -68,7 +68,7 @@ static int smu_handle_task(struct smu_context *smu, static int smu_reset(struct smu_context *smu); static int smu_set_fan_speed_pwm(void *handle, u32 speed); static int smu_set_fan_control_mode(void *handle, u32 value); -static int smu_set_power_limit(void *handle, uint32_t limit); +static int smu_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit); static int smu_set_fan_speed_rpm(void *handle, uint32_t speed); static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled); static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state); @@ -510,7 +510,7 @@ static void smu_restore_dpm_user_profile(struct smu_context *smu) /* set the user dpm power limit */ if (smu->user_dpm_profile.power_limit) { - ret = smu_set_power_limit(smu, smu->user_dpm_profile.power_limit); + ret = smu_set_power_limit(smu, SMU_DEFAULT_PPT_LIMIT, smu->current_power_limit); if (ret) dev_err(smu->adev->dev, "Failed to set power limit value\n"); } @@ -2258,7 +2258,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) adev->pm.dpm_enabled = true; if (smu->current_power_limit) { - ret = smu_set_power_limit(smu, smu->current_power_limit); + ret = smu_set_power_limit(smu, SMU_DEFAULT_PPT_LIMIT, smu->current_power_limit); if (ret && ret != -EOPNOTSUPP) return ret; } @@ -2958,16 +2958,14 @@ int smu_get_power_limit(void *handle, return ret; } -static int smu_set_power_limit(void *handle, uint32_t limit) +static int smu_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit) { struct smu_context *smu = handle; - uint32_t limit_type = limit >> 24; int ret = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; - limit &= (1<<24)-1; if (limit_type != SMU_DEFAULT_PPT_LIMIT) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 9fadfa820f5d..1dcbf250f486 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2397,7 +2397,6 @@ static int vangogh_set_power_limit(struct smu_context *smu, smu->current_power_limit = ppt_limit; break; case SMU_FAST_PPT_LIMIT: - ppt_limit &= ~(SMU_FAST_PPT_LIMIT << 24); if (ppt_limit > power_context->max_fast_ppt_limit) { dev_err(smu->adev->dev, "New power limit (%d) is over the max allowed %d\n", -- cgit v1.2.3 From 56a207c39d342b3a3632918ac672b58f8ecee023 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Oct 2025 15:59:05 -0500 Subject: drm/amd: Remove second call to set_power_limit() The min/max limits only make sense for default PPT. Restructure smu_set_power_limit() to only use them in that case. Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1c5f37cd5b75..a55f94b91bc9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2966,20 +2966,17 @@ static int smu_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; - if (limit_type != SMU_DEFAULT_PPT_LIMIT) - if (smu->ppt_funcs->set_power_limit) - return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - - if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { - dev_err(smu->adev->dev, - "New power limit (%d) is out of range [%d,%d]\n", - limit, smu->min_power_limit, smu->max_power_limit); - return -EINVAL; + if (limit_type == SMU_DEFAULT_PPT_LIMIT) { + if (!limit) + limit = smu->current_power_limit; + if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { + dev_err(smu->adev->dev, + "New power limit (%d) is out of range [%d,%d]\n", + limit, smu->min_power_limit, smu->max_power_limit); + return -EINVAL; + } } - if (!limit) - limit = smu->current_power_limit; - if (smu->ppt_funcs->set_power_limit) { ret = smu->ppt_funcs->set_power_limit(smu, limit_type, limit); if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) -- cgit v1.2.3 From 3cd7ceee9a17a0db7f8e3bf260a3762bd4586ac1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Oct 2025 15:59:06 -0500 Subject: drm/amd: Save and restore all limit types Vangogh has separate limits for default PPT and fast PPT. Add infrastructure to save both of these limits and restore both of them. Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 19 ++++++++++++------- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index a55f94b91bc9..ea27c087ecf1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -508,11 +508,14 @@ static void smu_restore_dpm_user_profile(struct smu_context *smu) /* Enable restore flag */ smu->user_dpm_profile.flags |= SMU_DPM_USER_PROFILE_RESTORE; - /* set the user dpm power limit */ - if (smu->user_dpm_profile.power_limit) { - ret = smu_set_power_limit(smu, SMU_DEFAULT_PPT_LIMIT, smu->current_power_limit); + /* set the user dpm power limits */ + for (int i = SMU_DEFAULT_PPT_LIMIT; i < SMU_LIMIT_TYPE_COUNT; i++) { + if (!smu->user_dpm_profile.power_limits[i]) + continue; + ret = smu_set_power_limit(smu, i, + smu->user_dpm_profile.power_limits[i]); if (ret) - dev_err(smu->adev->dev, "Failed to set power limit value\n"); + dev_err(smu->adev->dev, "Failed to set %d power limit value\n", i); } /* set the user dpm clock configurations */ @@ -2979,11 +2982,13 @@ static int smu_set_power_limit(void *handle, uint32_t limit_type, uint32_t limit if (smu->ppt_funcs->set_power_limit) { ret = smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) - smu->user_dpm_profile.power_limit = limit; + if (ret) + return ret; + if (!(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) + smu->user_dpm_profile.power_limits[limit_type] = limit; } - return ret; + return 0; } static int smu_print_smuclk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 582c186d8b62..32387e8c138b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -212,6 +212,7 @@ enum smu_power_src_type { enum smu_ppt_limit_type { SMU_DEFAULT_PPT_LIMIT = 0, SMU_FAST_PPT_LIMIT, + SMU_LIMIT_TYPE_COUNT, }; enum smu_ppt_limit_level { @@ -231,7 +232,7 @@ enum smu_memory_pool_size { struct smu_user_dpm_profile { uint32_t fan_mode; - uint32_t power_limit; + uint32_t power_limits[SMU_LIMIT_TYPE_COUNT]; uint32_t fan_speed_pwm; uint32_t fan_speed_rpm; uint32_t flags; -- cgit v1.2.3 From db36632ea51e8b02dd637d291c755899b20a5a31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Aug 2025 11:34:14 -0400 Subject: drm/amdgpu: clean up and unify hw fence handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple the amdgpu fence from the amdgpu_job structure. This lets us clean up the separate fence ops for the embedded fence and other fences. This also allows us to allocate the vm fence up front when we allocate the job. v2: Additional cleanup suggested by Christian v3: Additional cleanups suggested by Christian v4: Additional cleanups suggested by David and vm fence fix v5: cast seqno (David) Cc: David.Wu3@amd.com Cc: christian.koenig@amd.com Tested-by: David (Ming Qiang) Wu Reviewed-by: David (Ming Qiang) Wu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 143 +++------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 17 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 43 ++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- 8 files changed, 63 insertions(+), 167 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a70651050acf..0a62727e74f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence.base) == fence) + if (preempted && (&job->hw_fence->base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c7c999ae2a28..b6d2b9c0bac0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5809,11 +5809,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!amdgpu_ring_sched_ready(ring)) continue; - /* Clear job fence from fence drv to avoid force_completion - * leave NULL and vm flush fence in fence drv - */ - amdgpu_fence_driver_clear_job_fences(ring); - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } @@ -6542,7 +6537,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence.base)) { + if (job && dma_fence_is_signaled(&job->hw_fence->base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 18a7829122d2..1fe31d2f2706 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -45,16 +45,11 @@ * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; -static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops || - __f->base.ops == &amdgpu_job_fence_ops) - return __f; - - return NULL; + return __f; } /** @@ -98,51 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with - * @f: resulting fence object * @af: amdgpu fence input * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, - struct amdgpu_fence *af, unsigned int flags) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; - struct amdgpu_fence *am_fence; struct dma_fence __rcu **ptr; uint32_t seq; int r; - if (!af) { - /* create a separate hw fence */ - am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); - if (!am_fence) - return -ENOMEM; - } else { - am_fence = af; - } - fence = &am_fence->base; - am_fence->ring = ring; + fence = &af->base; + af->ring = ring; seq = ++ring->fence_drv.sync_seq; - am_fence->seq = seq; - if (af) { - dma_fence_init(fence, &amdgpu_job_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - /* Against remove in amdgpu_job_{free, free_cb} */ - dma_fence_get(fence); - } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - } + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - amdgpu_fence_save_wptr(fence); + amdgpu_fence_save_wptr(af); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -167,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, */ rcu_assign_pointer(*ptr, dma_fence_get(fence)); - *f = fence; - return 0; } @@ -669,36 +643,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } } -/** - * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring - * - * @ring: fence of the ring to be cleared - * - */ -void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) -{ - int i; - struct dma_fence *old, **ptr; - - for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { - ptr = &ring->fence_drv.fences[i]; - old = rcu_dereference_protected(*ptr, 1); - if (old && old->ops == &amdgpu_job_fence_ops) { - struct amdgpu_job *job; - - /* For non-scheduler bad job, i.e. failed ib test, we need to signal - * it right here or we won't be able to track them in fence_drv - * and they will remain unsignaled during sa_bo free. - */ - job = container_of(old, struct amdgpu_job, hw_fence.base); - if (!job->base.s_fence && !dma_fence_is_signaled(old)) - dma_fence_signal(old); - RCU_INIT_POINTER(*ptr, NULL); - dma_fence_put(old); - } - } -} - /** * amdgpu_fence_driver_set_error - set error code on fences * @ring: the ring which contains the fences @@ -755,7 +699,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) /** * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence * - * @fence: fence of the ring to signal + * @af: fence of the ring to signal * */ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) @@ -792,15 +736,13 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); /* signal the guilty fence */ - amdgpu_fence_write(ring, af->seq); + amdgpu_fence_write(ring, (u32)af->base.seqno); amdgpu_fence_process(ring); } -void amdgpu_fence_save_wptr(struct dma_fence *fence) +void amdgpu_fence_save_wptr(struct amdgpu_fence *af) { - struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); - - am_fence->wptr = am_fence->ring->wptr; + af->wptr = af->ring->wptr; } static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, @@ -866,13 +808,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) return (const char *)to_amdgpu_fence(f)->ring->name; } -static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) -{ - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); - - return (const char *)to_amdgpu_ring(job->base.sched)->name; -} - /** * amdgpu_fence_enable_signaling - enable signalling on fence * @f: fence @@ -889,23 +824,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) return true; } -/** - * amdgpu_job_fence_enable_signaling - enable signalling on job fence - * @f: fence - * - * This is the simliar function with amdgpu_fence_enable_signaling above, it - * only handles the job embedded fence. - */ -static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) -{ - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); - - if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); - - return true; -} - /** * amdgpu_fence_free - free up the fence memory * @@ -921,21 +839,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu) kfree(to_amdgpu_fence(f)); } -/** - * amdgpu_job_fence_free - free up the job with embedded fence - * - * @rcu: RCU callback head - * - * Free up the job with embedded fence after the RCU grace period. - */ -static void amdgpu_job_fence_free(struct rcu_head *rcu) -{ - struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - - /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence.base)); -} - /** * amdgpu_fence_release - callback that fence can be freed * @@ -949,19 +852,6 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } -/** - * amdgpu_job_fence_release - callback that job embedded fence can be freed - * - * @f: fence - * - * This is the simliar function with amdgpu_fence_release above, it - * only handles the job embedded fence. - */ -static void amdgpu_job_fence_release(struct dma_fence *f) -{ - call_rcu(&f->rcu, amdgpu_job_fence_free); -} - static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -969,13 +859,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; -static const struct dma_fence_ops amdgpu_job_fence_ops = { - .get_driver_name = amdgpu_fence_get_driver_name, - .get_timeline_name = amdgpu_job_fence_get_timeline_name, - .enable_signaling = amdgpu_job_fence_enable_signaling, - .release = amdgpu_job_fence_release, -}; - /* * Fence debugfs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7d9bcb72e8dd..39229ece83f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -149,17 +149,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (job) { vm = job->vm; fence_ctx = job->base.s_fence ? - job->base.s_fence->scheduled.context : 0; + job->base.s_fence->finished.context : 0; shadow_va = job->shadow_va; csa_va = job->csa_va; gds_va = job->gds_va; init_shadow = job->init_shadow; - af = &job->hw_fence; + af = job->hw_fence; /* Save the context of the job for reset handling. * The driver needs this so it can skip the ring * contents for guilty contexts. */ - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; + af->context = fence_ctx; + /* the vm fence is also part of the job's context */ + job->hw_vm_fence->context = fence_ctx; } else { vm = NULL; fence_ctx = 0; @@ -167,7 +169,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = 0; gds_va = 0; init_shadow = false; - af = NULL; + af = kzalloc(sizeof(*af), GFP_ATOMIC); + if (!af) + return -ENOMEM; } if (!ring->sched.ready) { @@ -289,7 +293,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } - r = amdgpu_fence_emit(ring, f, af, fence_flags); + r = amdgpu_fence_emit(ring, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -297,6 +301,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_undo(ring); return r; } + *f = &af->base; if (ring->funcs->insert_end) ring->funcs->insert_end(ring); @@ -317,7 +322,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, * fence so we know what rings contents to backup * after we reset the queue. */ - amdgpu_fence_save_wptr(*f); + amdgpu_fence_save_wptr(af); amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d020a890a0ea..e08d837668f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -137,7 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ring->funcs->reset) { dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); - r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); + r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence); if (!r) { atomic_inc(&ring->adev->gpu_reset_counter); dev_err(adev->dev, "Ring %s reset succeeded\n", @@ -186,6 +186,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int num_ibs, struct amdgpu_job **job, u64 drm_client_id) { + struct amdgpu_fence *af; + int r; + if (num_ibs == 0) return -EINVAL; @@ -193,6 +196,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!*job) return -ENOMEM; + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if (!af) { + r = -ENOMEM; + goto err_job; + } + (*job)->hw_fence = af; + + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if (!af) { + r = -ENOMEM; + goto err_fence; + } + (*job)->hw_vm_fence = af; + (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); @@ -204,6 +221,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, return drm_sched_job_init(&(*job)->base, entity, 1, owner, drm_client_id); + +err_fence: + kfree((*job)->hw_fence); +err_job: + kfree(*job); + + return r; } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, @@ -251,11 +275,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) struct dma_fence *f; unsigned i; - /* Check if any fences where initialized */ + /* Check if any fences were initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.base.ops) - f = &job->hw_fence.base; + else if (job->hw_fence && job->hw_fence->base.ops) + f = &job->hw_fence->base; else f = NULL; @@ -271,11 +295,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); - /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.base.ops) - kfree(job); - else - dma_fence_put(&job->hw_fence.base); + kfree(job); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -304,10 +324,7 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.base.ops) - kfree(job); - else - dma_fence_put(&job->hw_fence.base); + kfree(job); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 4a6487eb6cb5..7abf069d17d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -64,7 +64,8 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct amdgpu_fence hw_fence; + struct amdgpu_fence *hw_fence; + struct amdgpu_fence *hw_vm_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4b46e3c26ff3..87b962df5460 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -147,16 +147,14 @@ struct amdgpu_fence { u64 wptr; /* fence context for resets */ u64 context; - uint32_t seq; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; -void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); -void amdgpu_fence_save_wptr(struct dma_fence *fence); +void amdgpu_fence_save_wptr(struct amdgpu_fence *af); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -166,8 +164,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, - struct amdgpu_fence *af, unsigned int flags); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3a660ce75913..9309830821b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -779,7 +779,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; - struct amdgpu_fence *af; unsigned int patch; int r; @@ -842,12 +841,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { - r = amdgpu_fence_emit(ring, &fence, NULL, 0); + r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); if (r) return r; - /* this is part of the job's context */ - af = container_of(fence, struct amdgpu_fence, base); - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; + fence = &job->hw_vm_fence->base; } if (vm_flush_needed) { -- cgit v1.2.3 From aa6674f2da055da0629a62eb8debe774515645df Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 7 Oct 2025 18:12:04 +0530 Subject: drm/amdgpu: Reorganize sysfs ini/fini calls Aggregate sysfs ini/fini calls into separate functions. No functional change. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 88 +++++++++++++++++------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b6d2b9c0bac0..86818aa9ba13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4385,6 +4385,55 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) dev_info(adev->dev, "MCBP is enabled\n"); } +static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_atombios_sysfs_init(adev); + if (r) + drm_err(&adev->ddev, + "registering atombios sysfs failed (%d).\n", r); + + r = amdgpu_pm_sysfs_init(adev); + if (r) + dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r); + + r = amdgpu_ucode_sysfs_init(adev); + if (r) { + adev->ucode_sysfs_en = false; + dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r); + } else + adev->ucode_sysfs_en = true; + + r = amdgpu_device_attr_sysfs_init(adev); + if (r) + dev_err(adev->dev, "Could not create amdgpu device attr\n"); + + r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group); + if (r) + dev_err(adev->dev, + "Could not create amdgpu board attributes\n"); + + amdgpu_fru_sysfs_init(adev); + amdgpu_reg_state_sysfs_init(adev); + amdgpu_xcp_sysfs_init(adev); + + return r; +} + +static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev) +{ + if (adev->pm.sysfs_initialized) + amdgpu_pm_sysfs_fini(adev); + if (adev->ucode_sysfs_en) + amdgpu_ucode_sysfs_fini(adev); + amdgpu_device_attr_sysfs_fini(adev); + amdgpu_fru_sysfs_fini(adev); + + amdgpu_reg_state_sysfs_fini(adev); + amdgpu_xcp_sysfs_fini(adev); +} + /** * amdgpu_device_init - initialize the driver * @@ -4813,34 +4862,7 @@ fence_driver_init: * operations performed in `late_init` might affect the sysfs * interfaces creating. */ - r = amdgpu_atombios_sysfs_init(adev); - if (r) - drm_err(&adev->ddev, - "registering atombios sysfs failed (%d).\n", r); - - r = amdgpu_pm_sysfs_init(adev); - if (r) - dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r); - - r = amdgpu_ucode_sysfs_init(adev); - if (r) { - adev->ucode_sysfs_en = false; - dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r); - } else - adev->ucode_sysfs_en = true; - - r = amdgpu_device_attr_sysfs_init(adev); - if (r) - dev_err(adev->dev, "Could not create amdgpu device attr\n"); - - r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group); - if (r) - dev_err(adev->dev, - "Could not create amdgpu board attributes\n"); - - amdgpu_fru_sysfs_init(adev); - amdgpu_reg_state_sysfs_init(adev); - amdgpu_xcp_sysfs_init(adev); + r = amdgpu_device_sys_interface_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4962,15 +4984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) } amdgpu_fence_driver_hw_fini(adev); - if (adev->pm.sysfs_initialized) - amdgpu_pm_sysfs_fini(adev); - if (adev->ucode_sysfs_en) - amdgpu_ucode_sysfs_fini(adev); - amdgpu_device_attr_sysfs_fini(adev); - amdgpu_fru_sysfs_fini(adev); - - amdgpu_reg_state_sysfs_fini(adev); - amdgpu_xcp_sysfs_fini(adev); + amdgpu_device_sys_interface_fini(adev); /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); -- cgit v1.2.3 From 9e2096baab9addedde324f406d3ec9166e70d15b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 10 Oct 2025 17:12:52 +0530 Subject: drm/amdgpu: Add amdgpu_discovery_info Add amdgpu_discovery_info structure to keep all discovery related information. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 192 +++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 11 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 6 files changed, 129 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d8bb339d4ebd..d9bd1d71552e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -839,8 +839,6 @@ struct amd_powerplay { const struct amd_pm_funcs *pp_funcs; }; -struct ip_discovery_top; - /* polaris10 kickers */ #define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \ ((rid == 0xE3) || \ @@ -972,8 +970,7 @@ struct amdgpu_device { struct notifier_block acpi_nb; struct notifier_block pm_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; - struct debugfs_blob_wrapper debugfs_vbios_blob; - struct debugfs_blob_wrapper debugfs_discovery_blob; + struct debugfs_blob_wrapper debugfs_vbios_blob; struct mutex srbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ struct mutex grbm_idx_mutex; @@ -1063,6 +1060,9 @@ struct amdgpu_device { u32 log2_max_MBps; } mm_stats; + /* discovery*/ + struct amdgpu_discovery_info discovery; + /* display */ bool enable_virtual_display; struct amdgpu_vkms_output *amdgpu_vkms_output; @@ -1265,8 +1265,6 @@ struct amdgpu_device { struct list_head ras_list; - struct ip_discovery_top *ip_top; - struct amdgpu_reset_domain *reset_domain; struct mutex benchmark_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0a62727e74f0..d3a5189bd512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2123,10 +2123,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) debugfs_create_blob("amdgpu_vbios", 0444, root, &adev->debugfs_vbios_blob); - adev->debugfs_discovery_blob.data = adev->mman.discovery_bin; - adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size; - debugfs_create_blob("amdgpu_discovery", 0444, root, - &adev->debugfs_discovery_blob); + if (adev->discovery.debugfs_blob.size) + debugfs_create_blob("amdgpu_discovery", 0444, root, + &adev->discovery.debugfs_blob); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 86818aa9ba13..50acf2a94492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2633,7 +2633,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) chip_name = "arcturus"; break; case CHIP_NAVI12: - if (adev->mman.discovery_bin) + if (adev->discovery.bin) return 0; chip_name = "navi12"; break; @@ -5059,7 +5059,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); - if (adev->mman.discovery_bin) + if (adev->discovery.bin) amdgpu_discovery_fini(adev); amdgpu_reset_put_reset_domain(adev->reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index dd7b2b796427..13de62829f9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -254,9 +254,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET; /* This region is read-only and reserved from system use */ - discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC); + discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC); if (discv_regn) { - memcpy(binary, discv_regn, adev->mman.discovery_tmr_size); + memcpy(binary, discv_regn, adev->discovery.size); memunmap(discv_regn); return 0; } @@ -301,7 +301,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, if (sz_valid) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, - adev->mman.discovery_tmr_size, false); + adev->discovery.size, false); } else { ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } @@ -389,6 +389,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev) static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, struct binary_header *bhdr) { + uint8_t *discovery_bin = adev->discovery.bin; struct table_info *info; uint16_t checksum; uint16_t offset; @@ -398,14 +399,14 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, checksum = le16_to_cpu(info->checksum); struct nps_info_header *nhdr = - (struct nps_info_header *)(adev->mman.discovery_bin + offset); + (struct nps_info_header *)(discovery_bin + offset); if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) { dev_dbg(adev->dev, "invalid ip discovery nps info table id\n"); return -EINVAL; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, le32_to_cpu(nhdr->size_bytes), checksum)) { dev_dbg(adev->dev, "invalid nps info data table checksum\n"); @@ -447,49 +448,53 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; + uint8_t *discovery_bin; const char *fw_name; uint16_t offset; uint16_t size; uint16_t checksum; int r; - adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE; - adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL); - if (!adev->mman.discovery_bin) + adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); + if (!adev->discovery.bin) return -ENOMEM; + adev->discovery.size = DISCOVERY_TMR_SIZE; + adev->discovery.debugfs_blob.data = adev->discovery.bin; + adev->discovery.debugfs_blob.size = adev->discovery.size; + discovery_bin = adev->discovery.bin; /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { drm_dbg(&adev->ddev, "use ip discovery information from file"); - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); + r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin, + fw_name); if (r) goto out; } else { drm_dbg(&adev->ddev, "use ip discovery information from memory"); - r = amdgpu_discovery_read_binary_from_mem( - adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin); if (r) goto out; } /* check the ip discovery binary signature */ - if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) { dev_err(adev->dev, "get invalid ip discovery binary signature\n"); r = -EINVAL; goto out; } - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = offsetof(struct binary_header, binary_checksum) + sizeof(bhdr->binary_checksum); size = le16_to_cpu(bhdr->binary_size) - offset; checksum = le16_to_cpu(bhdr->binary_checksum); - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - size, checksum)) { + if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size, + checksum)) { dev_err(adev->dev, "invalid ip discovery binary checksum\n"); r = -EINVAL; goto out; @@ -501,15 +506,16 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (offset) { struct ip_discovery_header *ihdr = - (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); + (struct ip_discovery_header *)(discovery_bin + offset); if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { dev_err(adev->dev, "invalid ip discovery data table signature\n"); r = -EINVAL; goto out; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le16_to_cpu(ihdr->size), checksum)) { + if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, + le16_to_cpu(ihdr->size), + checksum)) { dev_err(adev->dev, "invalid ip discovery data table checksum\n"); r = -EINVAL; goto out; @@ -522,7 +528,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (offset) { struct gpu_info_header *ghdr = - (struct gpu_info_header *)(adev->mman.discovery_bin + offset); + (struct gpu_info_header *)(discovery_bin + offset); if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) { dev_err(adev->dev, "invalid ip discovery gc table id\n"); @@ -530,8 +536,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le32_to_cpu(ghdr->size), checksum)) { + if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, + le32_to_cpu(ghdr->size), + checksum)) { dev_err(adev->dev, "invalid gc data table checksum\n"); r = -EINVAL; goto out; @@ -544,7 +551,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (offset) { struct harvest_info_header *hhdr = - (struct harvest_info_header *)(adev->mman.discovery_bin + offset); + (struct harvest_info_header *)(discovery_bin + offset); if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) { dev_err(adev->dev, "invalid ip discovery harvest table signature\n"); @@ -552,8 +559,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - sizeof(struct harvest_table), checksum)) { + if (!amdgpu_discovery_verify_checksum( + discovery_bin + offset, + sizeof(struct harvest_table), checksum)) { dev_err(adev->dev, "invalid harvest data table checksum\n"); r = -EINVAL; goto out; @@ -566,7 +574,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (offset) { struct vcn_info_header *vhdr = - (struct vcn_info_header *)(adev->mman.discovery_bin + offset); + (struct vcn_info_header *)(discovery_bin + offset); if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) { dev_err(adev->dev, "invalid ip discovery vcn table id\n"); @@ -574,8 +582,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le32_to_cpu(vhdr->size_bytes), checksum)) { + if (!amdgpu_discovery_verify_checksum( + discovery_bin + offset, + le32_to_cpu(vhdr->size_bytes), checksum)) { dev_err(adev->dev, "invalid vcn data table checksum\n"); r = -EINVAL; goto out; @@ -588,7 +597,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (0 && offset) { struct mall_info_header *mhdr = - (struct mall_info_header *)(adev->mman.discovery_bin + offset); + (struct mall_info_header *)(discovery_bin + offset); if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) { dev_err(adev->dev, "invalid ip discovery mall table id\n"); @@ -596,8 +605,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le32_to_cpu(mhdr->size_bytes), checksum)) { + if (!amdgpu_discovery_verify_checksum( + discovery_bin + offset, + le32_to_cpu(mhdr->size_bytes), checksum)) { dev_err(adev->dev, "invalid mall data table checksum\n"); r = -EINVAL; goto out; @@ -607,8 +617,8 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return 0; out: - kfree(adev->mman.discovery_bin); - adev->mman.discovery_bin = NULL; + kfree(adev->discovery.bin); + adev->discovery.bin = NULL; if ((amdgpu_discovery != 2) && (RREG32(mmIP_DISCOVERY_VERSION) == 4)) amdgpu_ras_query_boot_status(adev, 4); @@ -620,8 +630,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev) { amdgpu_discovery_sysfs_fini(adev); - kfree(adev->mman.discovery_bin); - adev->mman.discovery_bin = NULL; + kfree(adev->discovery.bin); + adev->discovery.bin = NULL; } static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev, @@ -646,6 +656,7 @@ static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev, static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, uint32_t *vcn_harvest_count) { + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; @@ -655,21 +666,21 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, uint8_t inst; int i, j; - bhdr = (struct binary_header *)adev->mman.discovery_bin; - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + bhdr = (struct binary_header *)discovery_bin; + ihdr = (struct ip_discovery_header + *)(discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); num_dies = le16_to_cpu(ihdr->num_dies); /* scan harvest bit of all IP data structures */ for (i = 0; i < num_dies; i++) { die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); - dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + dhdr = (struct die_header *)(discovery_bin + die_offset); num_ips = le16_to_cpu(dhdr->num_ips); ip_offset = die_offset + sizeof(*dhdr); for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->mman.discovery_bin + - ip_offset); + ip = (struct ip *)(discovery_bin + ip_offset); inst = ip->number_instance; hw_id = le16_to_cpu(ip->hw_id); if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) @@ -711,13 +722,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, uint32_t *vcn_harvest_count, uint32_t *umc_harvest_count) { + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; struct harvest_table *harvest_info; u16 offset; int i; uint32_t umc_harvest_config = 0; - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset); if (!offset) { @@ -725,7 +737,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, return; } - harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset); + harvest_info = (struct harvest_table *)(discovery_bin + offset); for (i = 0; i < 32; i++) { if (le16_to_cpu(harvest_info->list[i].hw_id) == 0) @@ -1021,8 +1033,8 @@ static void ip_disc_release(struct kobject *kobj) kobj); struct amdgpu_device *adev = ip_top->adev; - adev->ip_top = NULL; kfree(ip_top); + adev->discovery.ip_top = NULL; } static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, @@ -1062,6 +1074,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, const size_t _ip_offset, const int num_ips, bool reg_base_64) { + uint8_t *discovery_bin = adev->discovery.bin; int ii, jj, kk, res; uint16_t hw_id; uint8_t inst; @@ -1079,7 +1092,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, struct ip_v4 *ip; struct ip_hw_instance *ip_hw_instance; - ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); + ip = (struct ip_v4 *)(discovery_bin + ip_offset); inst = ip->instance_number; hw_id = le16_to_cpu(ip->hw_id); if (amdgpu_discovery_validate_ip(adev, inst, hw_id) || @@ -1166,17 +1179,20 @@ next_ip: static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) { + struct ip_discovery_top *ip_top = adev->discovery.ip_top; + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; - struct kset *die_kset = &adev->ip_top->die_kset; + struct kset *die_kset = &ip_top->die_kset; u16 num_dies, die_offset, num_ips; size_t ip_offset; int ii, res; - bhdr = (struct binary_header *)adev->mman.discovery_bin; - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + bhdr = (struct binary_header *)discovery_bin; + ihdr = (struct ip_discovery_header + *)(discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); num_dies = le16_to_cpu(ihdr->num_dies); DRM_DEBUG("number of dies: %d\n", num_dies); @@ -1185,7 +1201,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) struct ip_die_entry *ip_die_entry; die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset); - dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + dhdr = (struct die_header *)(discovery_bin + die_offset); num_ips = le16_to_cpu(dhdr->num_ips); ip_offset = die_offset + sizeof(*dhdr); @@ -1219,30 +1235,32 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev) { + uint8_t *discovery_bin = adev->discovery.bin; + struct ip_discovery_top *ip_top; struct kset *die_kset; int res, ii; - if (!adev->mman.discovery_bin) + if (!discovery_bin) return -EINVAL; - adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL); - if (!adev->ip_top) + ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL); + if (!ip_top) return -ENOMEM; - adev->ip_top->adev = adev; - - res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype, + ip_top->adev = adev; + adev->discovery.ip_top = ip_top; + res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype, &adev->dev->kobj, "ip_discovery"); if (res) { DRM_ERROR("Couldn't init and add ip_discovery/"); goto Err; } - die_kset = &adev->ip_top->die_kset; + die_kset = &ip_top->die_kset; kobject_set_name(&die_kset->kobj, "%s", "die"); - die_kset->kobj.parent = &adev->ip_top->kobj; + die_kset->kobj.parent = &ip_top->kobj; die_kset->kobj.ktype = &die_kobj_ktype; - res = kset_register(&adev->ip_top->die_kset); + res = kset_register(&ip_top->die_kset); if (res) { DRM_ERROR("Couldn't register die_kset"); goto Err; @@ -1256,7 +1274,7 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev) return res; Err: - kobject_put(&adev->ip_top->kobj); + kobject_put(&ip_top->kobj); return res; } @@ -1301,10 +1319,11 @@ static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry) static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) { + struct ip_discovery_top *ip_top = adev->discovery.ip_top; struct list_head *el, *tmp; struct kset *die_kset; - die_kset = &adev->ip_top->die_kset; + die_kset = &ip_top->die_kset; spin_lock(&die_kset->list_lock); list_for_each_prev_safe(el, tmp, &die_kset->list) { list_del_init(el); @@ -1313,8 +1332,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) spin_lock(&die_kset->list_lock); } spin_unlock(&die_kset->list_lock); - kobject_put(&adev->ip_top->die_kset.kobj); - kobject_put(&adev->ip_top->kobj); + kobject_put(&ip_top->die_kset.kobj); + kobject_put(&ip_top->kobj); } /* ================================================== */ @@ -1325,6 +1344,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; + uint8_t *discovery_bin; struct ip_v4 *ip; uint16_t die_offset; uint16_t ip_offset; @@ -1340,22 +1360,23 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) r = amdgpu_discovery_init(adev); if (r) return r; - + discovery_bin = adev->discovery.bin; wafl_ver = 0; adev->gfx.xcc_mask = 0; adev->sdma.sdma_mask = 0; adev->vcn.inst_mask = 0; adev->jpeg.inst_mask = 0; - bhdr = (struct binary_header *)adev->mman.discovery_bin; - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + bhdr = (struct binary_header *)discovery_bin; + ihdr = (struct ip_discovery_header + *)(discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); num_dies = le16_to_cpu(ihdr->num_dies); DRM_DEBUG("number of dies: %d\n", num_dies); for (i = 0; i < num_dies; i++) { die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); - dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + dhdr = (struct die_header *)(discovery_bin + die_offset); num_ips = le16_to_cpu(dhdr->num_ips); ip_offset = die_offset + sizeof(*dhdr); @@ -1369,7 +1390,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) le16_to_cpu(dhdr->die_id), num_ips); for (j = 0; j < num_ips; j++) { - ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); + ip = (struct ip_v4 *)(discovery_bin + ip_offset); inst = ip->instance_number; hw_id = le16_to_cpu(ip->hw_id); @@ -1519,16 +1540,16 @@ next_ip: static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { + uint8_t *discovery_bin = adev->discovery.bin; struct ip_discovery_header *ihdr; struct binary_header *bhdr; int vcn_harvest_count = 0; int umc_harvest_count = 0; uint16_t offset, ihdr_ver; - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset); - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + - offset); + ihdr = (struct ip_discovery_header *)(discovery_bin + offset); ihdr_ver = le16_to_cpu(ihdr->version); /* * Harvest table does not fit Navi1x and legacy GPUs, @@ -1575,22 +1596,23 @@ union gc_info { static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; union gc_info *gc_info; u16 offset; - if (!adev->mman.discovery_bin) { + if (!discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); return -EINVAL; } - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[GC].offset); if (!offset) return 0; - gc_info = (union gc_info *)(adev->mman.discovery_bin + offset); + gc_info = (union gc_info *)(discovery_bin + offset); switch (le16_to_cpu(gc_info->v1.header.version_major)) { case 1: @@ -1683,24 +1705,25 @@ union mall_info { static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) { + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; union mall_info *mall_info; u32 u, mall_size_per_umc, m_s_present, half_use; u64 mall_size; u16 offset; - if (!adev->mman.discovery_bin) { + if (!discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); return -EINVAL; } - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset); if (!offset) return 0; - mall_info = (union mall_info *)(adev->mman.discovery_bin + offset); + mall_info = (union mall_info *)(discovery_bin + offset); switch (le16_to_cpu(mall_info->v1.header.version_major)) { case 1: @@ -1739,12 +1762,13 @@ union vcn_info { static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) { + uint8_t *discovery_bin = adev->discovery.bin; struct binary_header *bhdr; union vcn_info *vcn_info; u16 offset; int v; - if (!adev->mman.discovery_bin) { + if (!discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); return -EINVAL; } @@ -1759,13 +1783,13 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) return -EINVAL; } - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset); if (!offset) return 0; - vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset); + vcn_info = (union vcn_info *)(discovery_bin + offset); switch (le16_to_cpu(vcn_info->v1.header.version_major)) { case 1: @@ -1825,6 +1849,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, struct amdgpu_gmc_memrange **ranges, int *range_cnt, bool refresh) { + uint8_t *discovery_bin = adev->discovery.bin; struct amdgpu_gmc_memrange *mem_ranges; struct binary_header *bhdr; union nps_info *nps_info; @@ -1841,13 +1866,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, return r; nps_info = &nps_data; } else { - if (!adev->mman.discovery_bin) { + if (!discovery_bin) { dev_err(adev->dev, "fetch mem range failed, ip discovery uninitialized\n"); return -EINVAL; } - bhdr = (struct binary_header *)adev->mman.discovery_bin; + bhdr = (struct binary_header *)discovery_bin; offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset); if (!offset) @@ -1857,8 +1882,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, if (amdgpu_discovery_verify_npsinfo(adev, bhdr)) return -ENOENT; - nps_info = - (union nps_info *)(adev->mman.discovery_bin + offset); + nps_info = (union nps_info *)(discovery_bin + offset); } switch (le16_to_cpu(nps_info->v1.header.version_major)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index b44d56465c5b..b1eec3af3c4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,9 +24,20 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ +#include + #define DISCOVERY_TMR_SIZE (10 << 10) #define DISCOVERY_TMR_OFFSET (64 << 10) +struct ip_discovery_top; + +struct amdgpu_discovery_info { + struct debugfs_blob_wrapper debugfs_blob; + struct ip_discovery_top *ip_top; + uint32_t size; + uint8_t *bin; +}; + void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 86fc1809681c..a10b2982ca7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1957,7 +1957,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * If IP discovery enabled, a block of memory should be * reserved for IP discovey. */ - if (adev->mman.discovery_bin) { + if (adev->discovery.bin) { r = amdgpu_ttm_reserve_tmr(adev); if (r) return r; -- cgit v1.2.3 From 1cbac73d1a15db0f149cb0c6a932301de5f15b05 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 7 Oct 2025 18:30:24 +0530 Subject: drm/amdgpu: Move reset-on-init sequence earlier Complete reset-on-init sequence before sysfs interfaces are created. Devices get properly initiaized only after reset, and then only sysfs interfaces should be made available. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 50acf2a94492..0d5585bc3b04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4857,6 +4857,8 @@ fence_driver_init: flush_delayed_work(&adev->delayed_init_work); } + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) + amdgpu_xgmi_reset_on_init(adev); /* * Place those sysfs registering after `late_init`. As some of those * operations performed in `late_init` might affect the sysfs @@ -4890,9 +4892,6 @@ fence_driver_init: if (px) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); - if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) - amdgpu_xgmi_reset_on_init(adev); - amdgpu_device_check_iommu_direct_map(adev); adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; -- cgit v1.2.3 From 2b5b3f9b698f8e5d6492763947231e7d9420d8da Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 8 Oct 2025 13:07:13 +0530 Subject: drm/amd/pm: Grant interface access after full init Allow access to user interfaces like sysfs/hwmon only after full initialization of the device. When device is part of XGMI hive and a reset is required during initialization, the inteface files will be created as part of minimal device initialization. Full initialization of the device will be done only after all devices in XGMI hive are probed and a reset is done together on all. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 6bdf185c6b60..c83d69994380 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -108,8 +108,9 @@ const char * const amdgpu_pp_profile_name[] = { static int amdgpu_pm_dev_state_check(struct amdgpu_device *adev, bool runpm) { bool runpm_check = runpm ? adev->in_runpm : false; + bool full_init = (adev->init_lvl->level == AMDGPU_INIT_LEVEL_DEFAULT); - if (amdgpu_in_reset(adev)) + if (amdgpu_in_reset(adev) || !full_init) return -EBUSY; if (adev->in_suspend && !runpm_check) -- cgit v1.2.3 From 80e462c5b1963059dd78ee4650a3dfd53de4a10f Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 30 Sep 2025 10:46:05 +0800 Subject: drm/amd/pm: export a function amdgpu_smu_ras_send_msg to allow send msg directly provide a interface that allows ras client send msg to smu/pmfw directly. Signed-off-by: Yang Wang Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 11 +++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 11 +++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ea27c087ecf1..3f14fdaed332 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -612,6 +612,17 @@ bool is_support_cclk_dpm(struct amdgpu_device *adev) return true; } +int amdgpu_smu_ras_send_msg(struct amdgpu_device *adev, enum smu_message_type msg, + uint32_t param, uint32_t *read_arg) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = -EOPNOTSUPP; + + if (smu->ppt_funcs && smu->ppt_funcs->ras_send_msg) + ret = smu->ppt_funcs->ras_send_msg(smu, msg, param, read_arg); + + return ret; +} static int smu_sys_get_pp_table(void *handle, char **table) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 32387e8c138b..c48028abc8c4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1522,6 +1522,15 @@ struct pptable_funcs { */ ssize_t (*get_xcp_metrics)(struct smu_context *smu, int xcp_id, void *table); + /** + * @ras_send_msg: Send a message with a parameter from Ras + * &msg: Type of message. + * ¶m: Message parameter. + * &read_arg: SMU response (optional). + */ + int (*ras_send_msg)(struct smu_context *smu, + enum smu_message_type msg, uint32_t param, uint32_t *read_arg); + }; typedef enum { @@ -1787,6 +1796,8 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, ssize_t smu_get_pm_policy_info(struct smu_context *smu, enum pp_pm_policy p_type, char *sysbuf); +int amdgpu_smu_ras_send_msg(struct amdgpu_device *adev, enum smu_message_type msg, + uint32_t param, uint32_t *readarg); #endif void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 285cf7979693..5d4315c4aa27 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -3226,6 +3226,24 @@ static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) return ret; } +static int smu_v13_0_6_ras_send_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg) +{ + int ret; + + switch (msg) { + case SMU_MSG_QueryValidMcaCount: + case SMU_MSG_QueryValidMcaCeCount: + case SMU_MSG_McaBankDumpDW: + case SMU_MSG_McaBankCeDumpDW: + case SMU_MSG_ClearMcaOnRead: + ret = smu_cmn_send_smc_msg_with_param(smu, msg, param, read_arg); + break; + default: + ret = -EPERM; + } + + return ret; +} static int smu_v13_0_6_post_init(struct smu_context *smu) { @@ -3921,6 +3939,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .reset_sdma = smu_v13_0_6_reset_sdma, .dpm_reset_vcn = smu_v13_0_6_reset_vcn, .post_init = smu_v13_0_6_post_init, + .ras_send_msg = smu_v13_0_6_ras_send_msg, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 071bba962456a46b261fcefe26bd533cbc059ea2 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 10 Oct 2025 17:23:33 +0530 Subject: drm/amdgpu: Reserve discovery TMR only if needed For legacy SOCs, discovery binary is sideloaded. Instead of checking for binary blob, use a flag to determine if discovery region needs to be reserved. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 +++++++++------------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 13de62829f9c..4e75334f3b3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -298,10 +298,15 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, else vram_size <<= 20; + /* + * If in VRAM, discovery TMR is marked for reservation. If it is in system mem, + * then it is not required to be reserved. + */ if (sz_valid) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->discovery.size, false); + adev->discovery.reserve_tmr = true; } else { ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } @@ -418,8 +423,11 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) { - if (amdgpu_discovery == 2) + if (amdgpu_discovery == 2) { + /* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */ + adev->discovery.reserve_tmr = true; return "amdgpu/ip_discovery.bin"; + } switch (adev->asic_type) { case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index b1eec3af3c4a..4ce04486cc31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -36,6 +36,7 @@ struct amdgpu_discovery_info { struct ip_discovery_top *ip_top; uint32_t size; uint8_t *bin; + bool reserve_tmr; }; void amdgpu_discovery_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a10b2982ca7a..0bd02234eca0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1769,18 +1769,14 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - if (!adev->gmc.is_app_apu) { - ret = amdgpu_bo_create_kernel_at( - adev, adev->gmc.real_vram_size - reserve_size, - reserve_size, &adev->mman.fw_reserved_memory, NULL); - if (ret) { - dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, - NULL, NULL); - return ret; - } - } else { - DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, reserve_size, + &adev->mman.fw_reserved_memory, NULL); + if (ret) { + dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); + return ret; } return 0; @@ -1957,7 +1953,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * If IP discovery enabled, a block of memory should be * reserved for IP discovey. */ - if (adev->discovery.bin) { + if (adev->discovery.reserve_tmr) { r = amdgpu_ttm_reserve_tmr(adev); if (r) return r; -- cgit v1.2.3 From 737da5363cc07c96d59f2ebaf9f9f87235becf1d Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 10 Oct 2025 18:09:57 +0530 Subject: drm/amdgpu: update the functions to use amdgpu version of hmm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At times we need a bo reference for hmm and for that add a new struct amdgpu_hmm_range which will hold an optional bo member and hmm_range. Use amdgpu_hmm_range instead of hmm_range and let the bo as an optional argument for the caller if they want to the bo reference to be taken or they want to handle that explicitly. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 31 +++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 19 +++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 7 +++--- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 1 - drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 14 +++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 - 13 files changed, 56 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 9e120c934cc1..8bdfcde2029b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -71,7 +71,7 @@ struct kgd_mem { struct mutex lock; struct amdgpu_bo *bo; struct dma_buf *dmabuf; - struct hmm_range *range; + struct amdgpu_hmm_range *range; struct list_head attachments; /* protected by amdkfd_process_info.lock */ struct list_head validate_list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d0b1468a2ae1..dbdf33bc03b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1057,7 +1057,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; - struct hmm_range *range; + struct amdgpu_hmm_range *range; int ret = 0; mutex_lock(&process_info->lock); @@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - range = amdgpu_hmm_range_alloc(); + range = amdgpu_hmm_range_alloc(NULL); if (unlikely(!range)) { ret = -ENOMEM; goto unregister_out; @@ -2573,7 +2573,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } } - mem->range = amdgpu_hmm_range_alloc(); + mem->range = amdgpu_hmm_range_alloc(NULL); if (unlikely(!mem->range)) return -ENOMEM; /* Get updated user pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a716c9886c74..2b5e7c46a39d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -38,7 +38,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo *bo; struct amdgpu_bo_va *bo_va; uint32_t priority; - struct hmm_range *range; + struct amdgpu_hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c319d216421e..ecdfe6cb36cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -892,7 +891,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - e->range = amdgpu_hmm_range_alloc(); + e->range = amdgpu_hmm_range_alloc(NULL); if (unlikely(!e->range)) return -ENOMEM; @@ -901,7 +900,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; for (i = 0; i < bo->tbo.ttm->num_pages; i++) { - if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) { + if (bo->tbo.ttm->pages[i] != + hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) { userpage_invalidated = true; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 4bc506f4924a..f2505ae5fd65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -531,7 +531,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_gem_userptr *args = data; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_gem_object *gobj; - struct hmm_range *range; + struct amdgpu_hmm_range *range; struct amdgpu_bo *bo; uint32_t handle; int r; @@ -572,7 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - range = amdgpu_hmm_range_alloc(); + range = amdgpu_hmm_range_alloc(NULL); if (unlikely(!range)) return -ENOMEM; r = amdgpu_ttm_tt_get_user_pages(bo, range); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index b582fd217bd0..04f02e0c8bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -168,12 +168,13 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, - struct hmm_range *hmm_range) + struct amdgpu_hmm_range *range) { unsigned long end; unsigned long timeout; unsigned long *pfns; int r = 0; + struct hmm_range *hmm_range = &range->hmm_range; pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); if (unlikely(!pfns)) { @@ -226,25 +227,33 @@ out_free_range: return r; } -bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range) +bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range) { - if (!hmm_range) + if (!range) return false; - return !mmu_interval_read_retry(hmm_range->notifier, - hmm_range->notifier_seq); + return !mmu_interval_read_retry(range->hmm_range.notifier, + range->hmm_range.notifier_seq); } -struct hmm_range *amdgpu_hmm_range_alloc(void) +struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo) { - return kzalloc(sizeof(struct hmm_range), GFP_KERNEL); + struct amdgpu_hmm_range *range; + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) + return NULL; + + range->bo = amdgpu_bo_ref(bo); + return range; } -void amdgpu_hmm_range_free(struct hmm_range *hmm_range) +void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) { - if (!hmm_range) + if (!range) return; - kvfree(hmm_range->hmm_pfns); - kfree(hmm_range); + kvfree(range->hmm_range.hmm_pfns); + amdgpu_bo_unref(&range->bo); + kfree(range); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index e85f912b8938..140bc9cd57b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -31,15 +31,20 @@ #include #include +struct amdgpu_hmm_range { + struct hmm_range hmm_range; + struct amdgpu_bo *bo; +}; + int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, - struct hmm_range *hmm_range); + struct amdgpu_hmm_range *range); #if defined(CONFIG_HMM_MIRROR) -bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range); -struct hmm_range *amdgpu_hmm_range_alloc(void); -void amdgpu_hmm_range_free(struct hmm_range *hmm_range); +bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range); +struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo); +void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range); int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_hmm_unregister(struct amdgpu_bo *bo); #else @@ -52,17 +57,17 @@ static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {} -static inline bool amdgpu_hmm_range_valid(struct hmm_range *hmm_range) +static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range) { return false; } -static inline struct hmm_range *amdgpu_hmm_range_alloc(void) +static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo) { return NULL; } -static inline void amdgpu_hmm_range_free(struct hmm_range *hmm_range) {} +static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {} #endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0bd02234eca0..2f2a88979644 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -709,7 +709,7 @@ struct amdgpu_ttm_tt { * that range is a valid memory and it is freed too. */ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range *range) + struct amdgpu_hmm_range *range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -762,12 +762,12 @@ out_unlock: * that backs user memory and will ultimately be mapped into the device * address space. */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range) { unsigned long i; for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL; + ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index d75866a389f1..68e225efec3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -28,6 +28,7 @@ #include #include #include "amdgpu_vram_mgr.h" +#include "amdgpu_hmm.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) @@ -186,16 +187,16 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range *range); + struct amdgpu_hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct hmm_range *range) + struct amdgpu_hmm_range *range) { return -EPERM; } #endif -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range); +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 59a5a3fea65d..46c84fc60af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -21,7 +21,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include -#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 2eebf67f9c2c..2b7fd442d29c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -31,7 +31,6 @@ #include #include #include -#include #include "kfd_priv.h" #include "kfd_svm.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 91609dd5730f..f041643308ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1698,7 +1698,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, start = map_start << PAGE_SHIFT; end = (map_last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { - struct hmm_range *hmm_range = NULL; + struct amdgpu_hmm_range *range = NULL; unsigned long map_start_vma; unsigned long map_last_vma; struct vm_area_struct *vma; @@ -1737,13 +1737,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } WRITE_ONCE(p->svms.faulting_task, current); - hmm_range = amdgpu_hmm_range_alloc(); + range = amdgpu_hmm_range_alloc(NULL); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, readonly, owner, - hmm_range); + range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { - amdgpu_hmm_range_free(hmm_range); + amdgpu_hmm_range_free(range); pr_debug("failed %d to get svm range pages\n", r); } } else { @@ -1753,7 +1753,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (!r) { offset = (addr >> PAGE_SHIFT) - prange->start; r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, - hmm_range->hmm_pfns); + range->hmm_range.hmm_pfns); if (r) pr_debug("failed %d to dma map range\n", r); } @@ -1764,12 +1764,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, * Overrride return value to TRY AGAIN only if prior returns * were successful */ - if (hmm_range && !amdgpu_hmm_range_valid(hmm_range) && !r) { + if (range && !amdgpu_hmm_range_valid(range) && !r) { pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; } /* Free the hmm range */ - amdgpu_hmm_range_free(hmm_range); + amdgpu_hmm_range_free(range); if (!r && !list_empty(&prange->child_list)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 01c7a4877904..a63dfc95b602 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -31,7 +31,6 @@ #include #include #include -#include #include "amdgpu.h" #include "kfd_priv.h" -- cgit v1.2.3 From 42f148788469792df207751e2339ef2bb8a1e33e Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 25 Sep 2025 14:31:54 +0530 Subject: drm/amdgpu/userqueue: validate userptrs for userqueues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit userptrs could be changed by the user at any time and hence while locking all the bos before GPU start processing validate all the userptr bos. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 79 +++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 33c54ecd46d5..9d4751a39c20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userq.h" +#include "amdgpu_hmm.h" #include "amdgpu_userq_fence.h" u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) @@ -856,12 +857,21 @@ static int amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + bool invalidated = false, new_addition = false; + struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_hmm_range *range; struct amdgpu_vm *vm = &fpriv->vm; + unsigned long key, tmp_key; struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; struct drm_exec exec; + struct xarray xa; int ret; + xa_init(&xa); + +retry_lock: drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = amdgpu_vm_lock_pd(vm, &exec, 1); @@ -888,10 +898,72 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) goto unlock_all; } + if (invalidated) { + xa_for_each(&xa, tmp_key, range) { + bo = range->bo; + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto unlock_all; + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto unlock_all; + } + invalidated = false; + } + ret = amdgpu_vm_handle_moved(adev, vm, NULL); if (ret) goto unlock_all; + key = 0; + /* Validate User Ptr BOs */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + bo = bo_va->base.bo; + + if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm)) + continue; + + range = xa_load(&xa, key); + if (range && range->bo != bo) { + xa_erase(&xa, key); + amdgpu_hmm_range_free(range); + range = NULL; + } + + if (!range) { + range = amdgpu_hmm_range_alloc(bo); + if (!range) { + ret = -ENOMEM; + goto unlock_all; + } + + xa_store(&xa, key, range, GFP_KERNEL); + new_addition = true; + } + key++; + } + + if (new_addition) { + drm_exec_fini(&exec); + xa_for_each(&xa, tmp_key, range) { + if (!range) + continue; + bo = range->bo; + ret = amdgpu_ttm_tt_get_user_pages(bo, range); + if (ret) + goto unlock_all; + } + + invalidated = true; + new_addition = false; + goto retry_lock; + } + ret = amdgpu_vm_update_pdes(adev, vm, false); if (ret) goto unlock_all; @@ -911,6 +983,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) unlock_all: drm_exec_fini(&exec); + xa_for_each(&xa, tmp_key, range) { + if (!range) + continue; + bo = range->bo; + amdgpu_hmm_range_free(range); + } + xa_destroy(&xa); return ret; } -- cgit v1.2.3 From ace232eff50e8c898103c56b3b5303e776616274 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 30 Sep 2025 10:47:49 +0800 Subject: drm/amdgpu: Add ras module files into amdgpu Add ras module files into amdgpu. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 8 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c | 2 +- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 2 +- drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h | 1 + drivers/gpu/drm/amd/ras/rascore/ras_aca.c | 2 +- drivers/gpu/drm/amd/ras/rascore/ras_cmd.c | 9 ++------- drivers/gpu/drm/amd/ras/rascore/ras_core.c | 3 +-- drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c | 4 ++-- drivers/gpu/drm/amd/ras/rascore/ras_mp1.c | 2 +- drivers/gpu/drm/amd/ras/rascore/ras_psp.c | 4 ++-- 11 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 64e7acff8f18..ebe08947c5a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -37,7 +37,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \ -I$(FULL_AMD_DISPLAY_PATH)/dc \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ - -I$(FULL_AMD_PATH)/amdkfd + -I$(FULL_AMD_PATH)/amdkfd \ + -I$(FULL_AMD_PATH)/ras/ras_mgr # Locally disable W=1 warnings enabled in drm subsystem Makefile subdir-ccflags-y += -Wno-override-init @@ -324,4 +325,9 @@ amdgpu-y += \ isp_v4_1_1.o endif +AMD_GPU_RAS_PATH := ../ras +AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras +include $(AMD_GPU_RAS_FULL_PATH)/Makefile +amdgpu-y += $(AMD_GPU_RAS_FILES) + obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6cf0dfd38be8..9f21b6cf8724 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -504,6 +504,7 @@ struct ras_critical_region { }; struct amdgpu_ras { + void *ras_mgr; /* ras infrastructure */ /* for ras itself. */ uint32_t features; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c index 195ca51a96d5..4706e737969a 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c @@ -68,7 +68,7 @@ static struct ras_core_context *ras_cmd_get_ras_core(uint64_t dev_handle) if (!dev_handle || (dev_handle == RAS_CMD_DEV_HANDLE_MAGIC)) return NULL; - ras_core = (struct ras_core_context *)(dev_handle ^ RAS_CMD_DEV_HANDLE_MAGIC); + ras_core = (struct ras_core_context *)(uintptr_t)(dev_handle ^ RAS_CMD_DEV_HANDLE_MAGIC); if (ras_cmd_get_dev_handle(ras_core) == dev_handle) return ras_core; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index a038c87c045d..e66d915831a9 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -373,7 +373,7 @@ struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context(struct amdgpu_device *adev) return (struct amdgpu_ras_mgr *)adev->psp.ras_context.ras->ras_mgr; } -static const struct amd_ip_funcs ras_v1_0_ip_funcs = { +static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = { .name = "ras_v1_0", .sw_init = amdgpu_ras_mgr_sw_init, .sw_fini = amdgpu_ras_mgr_sw_fini, diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h index c48ff26525d6..8156531a7b63 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "amdgpu.h" #define RAS_DEV_ERR(device, fmt, ...) \ diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_aca.c b/drivers/gpu/drm/amd/ras/rascore/ras_aca.c index f9b8a1fa4f1f..e433c70d2989 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_aca.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_aca.c @@ -350,7 +350,7 @@ static int aca_banks_update(struct ras_core_context *ras_core, struct aca_bank_ecc bank_ecc; struct ras_log_batch_tag *batch_tag = NULL; u32 count = 0; - int ret; + int ret = 0; int i; mutex_lock(&ras_core->ras_aca.bank_op_lock); diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c index 697619b30ea2..6fe3b115986c 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c @@ -32,7 +32,7 @@ static int ras_cmd_add_device(struct ras_core_context *ras_core) { INIT_LIST_HEAD(&ras_core->ras_cmd.head); ras_core->ras_cmd.ras_core = ras_core; - ras_core->ras_cmd.dev_handle = (uint64_t)ras_core ^ RAS_CMD_DEV_HANDLE_MAGIC; + ras_core->ras_cmd.dev_handle = (uintptr_t)ras_core ^ RAS_CMD_DEV_HANDLE_MAGIC; return 0; } @@ -212,11 +212,6 @@ static int ras_cmd_get_cper_records(struct ras_core_context *ras_core, if (!req->buf_size || !req->buf_ptr || !req->cper_num) return RAS_CMD__ERROR_INVALID_INPUT_DATA; - if (!access_ok((void *)req->buf_ptr, req->buf_size)) { - RAS_DEV_ERR(ras_core->dev, "Invalid cper buffer memory!\n"); - return RAS_CMD__ERROR_INVALID_INPUT_DATA; - } - buffer = kzalloc(req->buf_size, GFP_KERNEL); if (!buffer) return RAS_CMD__ERROR_GENERIC; @@ -240,7 +235,7 @@ static int ras_cmd_get_cper_records(struct ras_core_context *ras_core, } if ((ret && (ret != -ENOMEM)) || - copy_to_user((void *)req->buf_ptr, buffer, offset)) { + copy_to_user(u64_to_user_ptr(req->buf_ptr), buffer, offset)) { kfree(buffer); return RAS_CMD__ERROR_GENERIC; } diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 41fc9f0d84e4..45fc0608043f 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -68,8 +68,7 @@ int ras_core_convert_timestamp_to_time(struct ras_core_context *ras_core, int seconds_per_minute = 60; int days, remaining_seconds; - days = timestamp / seconds_per_day; - remaining_seconds = timestamp % seconds_per_day; + days = div64_u64_rem(timestamp, seconds_per_day, (uint64_t *)&remaining_seconds); /* utc_timestamp follows the Unix epoch */ year = 1970; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c index bca094058f91..d0621464f1a7 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c @@ -67,7 +67,7 @@ static int ras_log_ring_add_data(struct ras_core_context *ras_core, { struct ras_log_ring *log_ring = &ras_core->ras_log_ring; unsigned long flags = 0; - int ret; + int ret = 0; if (batch_tag && (batch_tag->sub_seqno >= MAX_RECORD_PER_BATCH)) { RAS_DEV_ERR(ras_core->dev, @@ -200,7 +200,7 @@ struct ras_log_batch_tag *ras_log_ring_create_batch_tag(struct ras_core_context { struct ras_log_ring *log_ring = &ras_core->ras_log_ring; struct ras_log_batch_tag *batch_tag; - unsigned long flags; + unsigned long flags = 0; batch_tag = kzalloc(sizeof(*batch_tag), GFP_KERNEL); if (!batch_tag) diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c index 92f250e2466d..f3321df85021 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_mp1.c @@ -52,7 +52,7 @@ int ras_mp1_get_bank_count(struct ras_core_context *ras_core, } int ras_mp1_dump_bank(struct ras_core_context *ras_core, - enum ras_err_type type, u32 idx, u32 reg_idx, u64 *val) + u32 type, u32 idx, u32 reg_idx, u64 *val) { struct ras_mp1 *mp1 = &ras_core->ras_mp1; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_psp.c b/drivers/gpu/drm/amd/ras/rascore/ras_psp.c index c94effd4b114..ccdb42d2dd60 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_psp.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_psp.c @@ -173,7 +173,7 @@ static uint32_t __get_ring_frame_slot(struct ras_core_context *ras_core) ras_ring_wptr_dw = psp->ip_func->psp_ras_ring_wptr_get(ras_core); - return (ras_ring_wptr_dw << 2) / sizeof(struct psp_gfx_rb_frame); + return div64_u64((ras_ring_wptr_dw << 2), sizeof(struct psp_gfx_rb_frame)); } static int __set_ring_frame_slot(struct ras_core_context *ras_core, @@ -200,7 +200,7 @@ static int write_frame_to_ras_psp_ring(struct ras_core_context *ras_core, return -ENOMEM; max_frame_slot = - ring_mem->mem_size / sizeof(struct psp_gfx_rb_frame); + div64_u64(ring_mem->mem_size, sizeof(struct psp_gfx_rb_frame)); rb_frame = (struct psp_gfx_rb_frame *)ring_mem->mem_cpu_addr; -- cgit v1.2.3 From 72ea12f6be3bb57c2118189f7a7cfeb1f7c2748c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 18 Jun 2025 10:45:55 -0400 Subject: drm/amdgpu: update remove after reset flag for MES remove queue Remove queue after reset flag is required to remove a queue that has been successfully reset to clean up the MES' internal state. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 4 ++++ drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 ++- 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 97c137c90f97..9c27a68cb82f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -239,6 +239,7 @@ struct mes_add_queue_input { struct mes_remove_queue_input { uint32_t doorbell_offset; uint64_t gang_context_addr; + bool remove_queue_after_reset; }; struct mes_map_legacy_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index da575bb1377f..3a52754b5cad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -369,6 +369,7 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, struct mes_remove_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -379,6 +380,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + if (mes_rev >= 0x60) + mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset; + return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 7f3512d9de07..744e95d3984a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -361,6 +361,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, struct mes_remove_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -371,6 +372,9 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + if (mes_rev >= 0x5a) + mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset; + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index ab1cfc92dbeb..f9629d42ada2 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -345,7 +345,8 @@ union MESAPI__REMOVE_QUEUE { uint32_t unmap_kiq_utility_queue : 1; uint32_t preempt_legacy_gfx_queue : 1; uint32_t unmap_legacy_queue : 1; - uint32_t reserved : 28; + uint32_t remove_queue_after_reset : 1; + uint32_t reserved : 27; }; struct MES_API_STATUS api_status; diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 69611c7e30e3..2f12cba4eb66 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -399,7 +399,8 @@ union MESAPI__REMOVE_QUEUE { uint32_t unmap_kiq_utility_queue : 1; uint32_t preempt_legacy_gfx_queue : 1; uint32_t unmap_legacy_queue : 1; - uint32_t reserved : 28; + uint32_t remove_queue_after_reset : 1; + uint32_t reserved : 27; }; struct MES_API_STATUS api_status; -- cgit v1.2.3 From 4b6ec94fdae2407d9b8e69e3ec5f879e72ad667e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Oct 2025 15:59:07 -0500 Subject: drm/amd: Drop calls to restore power limit and clock from smu_resume() User requested power limits and clock settings are already restored as part of smu_restore_dpm_user_profile(). It's unnecessary to call the same restore as part of smu_resume(). Revert the following commits to drop that extra restore: commit ed4efe426a49 ("drm/amd: Restore cached power limit during resume") commit 796ff8a7e01b ("drm/amd: Restore cached manual clock settings during resume") commit f9b80514a722 ("drm/amd: Only restore cached manual clock settings in restore if OD enabled") Suggested-by: Lijo Lazar Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 3f14fdaed332..4317da6f7c38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2239,7 +2239,6 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) int ret; struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); if (amdgpu_sriov_multi_vf_mode(adev)) return 0; @@ -2271,18 +2270,6 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) adev->pm.dpm_enabled = true; - if (smu->current_power_limit) { - ret = smu_set_power_limit(smu, SMU_DEFAULT_PPT_LIMIT, smu->current_power_limit); - if (ret && ret != -EOPNOTSUPP) - return ret; - } - - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { - ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); - if (ret) - return ret; - } - dev_info(adev->dev, "SMU is resumed successfully!\n"); return 0; -- cgit v1.2.3 From b03be3ef2d6aa46c14522a7792d515c61849066c Mon Sep 17 00:00:00 2001 From: Marlon Henrique Sanches Date: Mon, 13 Oct 2025 15:31:23 -0300 Subject: drm/i915/gem: fix typo in comment (I915_EXEC_NO_RELOC) The comment referenced the flag name incorrectly as 'I915_EXEC_NORELOC' (missing underscore). This patch corrects the spelling in the comment only; there is no functional change. Signed-off-by: Marlon Henrique Sanches Link: https://lore.kernel.org/r/20251013183123.438573-1-marlonsanches@estudante.ufscar.br Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f243f8a5215d..4eafc167a299 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -142,7 +142,7 @@ enum { * we want to leave the object where it is and for all the existing relocations * to match. If the object is given a new address, or if userspace thinks the * object is elsewhere, we have to parse all the relocation entries and update - * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that + * the addresses. Userspace can set the I915_EXEC_NO_RELOC flag to hint that * all the target addresses in all of its objects match the value in the * relocation entries and that they all match the presumed offsets given by the * list of execbuffer objects. Using this knowledge, we know that if we haven't -- cgit v1.2.3 From dd1409b62e46bac2c46ee2fa50d2f6d2f9eb006a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 10 Oct 2025 14:07:51 +0300 Subject: drm/i915: include gen 2 in HAS_128_BYTE_Y_TILING() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen 2 platforms actually have 128-byte Y-tile, it's just different from the 128-byte Y-tile on i945+. Make the HAS_128_BYTE_Y_TILING() feature check macro and its usage slightly less convoluted by including gen 2 in it. i915_tiling_ok() would strictly not need changing, but separate the if clauses to emphasize gen 2 X-tile also being 128 bytes. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://lore.kernel.org/r/41bf9d67a11f38f4ab0f82740f38d5c8fe0bb58b.1760094361.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 5 +++-- drivers/gpu/drm/i915/i915_drv.h | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 99823ef42ef1..3bfd211d64ba 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -814,7 +814,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 64; fallthrough; case I915_FORMAT_MOD_Y_TILED: - if (DISPLAY_VER(display) == 2 || HAS_128_BYTE_Y_TILING(i915)) + if (HAS_128_BYTE_Y_TILING(i915)) return 128; else return 512; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 5a296ba3758a..567b97d28d30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -145,8 +145,9 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, return false; } - if (GRAPHICS_VER(i915) == 2 || - (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) + if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)) + tile_width = 128; + else if (GRAPHICS_VER(i915) == 2) tile_width = 128; else tile_width = 512; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e159bb8ad2f..4b66e5d017d9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -602,8 +602,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. */ -#define HAS_128_BYTE_Y_TILING(i915) (GRAPHICS_VER(i915) != 2 && \ - !(IS_I915G(i915) || IS_I915GM(i915))) +#define HAS_128_BYTE_Y_TILING(i915) (!IS_I915G(i915) && !IS_I915GM(i915)) #define HAS_RC6(i915) (INTEL_INFO(i915)->has_rc6) #define HAS_RC6p(i915) (INTEL_INFO(i915)->has_rc6p) -- cgit v1.2.3 From 129c0aa5eb20df394d91764baa6a1c1b021bbead Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 10 Oct 2025 14:07:52 +0300 Subject: drm/i915/display: duplicate 128-byte Y-tiling feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should try to get rid of checks that depend on struct drm_i915_private (or struct xe_device) in display code. HAS_128_BYTE_Y_TILING() is one of them. In the interest of simplicity, just duplicate the check as HAS_128B_Y_TILING() in display. v2: gen2 also has 128-byte Y-tile Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://lore.kernel.org/r/2a7877f8f1d11114c1a17869bd24d83e13b1fac2.1760094361.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.h | 1 + drivers/gpu/drm/i915/display/intel_fb.c | 3 +-- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 0e062753cf9b..9960ac13a6dd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -142,6 +142,7 @@ struct intel_display_platforms { func(overlay_needs_physical); \ func(supports_tv); +#define HAS_128B_Y_TILING(__display) (!(__display)->platform.i915g && !(__display)->platform.i915gm) #define HAS_4TILE(__display) ((__display)->platform.dg2 || DISPLAY_VER(__display) >= 14) #define HAS_ASYNC_FLIPS(__display) (DISPLAY_VER(__display) >= 5) #define HAS_AS_SDP(__display) (DISPLAY_VER(__display) >= 13) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 3bfd211d64ba..7388791dfde0 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -777,7 +777,6 @@ unsigned int intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) { struct intel_display *display = to_intel_display(fb->dev); - struct drm_i915_private *i915 = to_i915(display->drm); unsigned int cpp = fb->format->cpp[color_plane]; switch (fb->modifier) { @@ -814,7 +813,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 64; fallthrough; case I915_FORMAT_MOD_Y_TILED: - if (HAS_128_BYTE_Y_TILING(i915)) + if (HAS_128B_Y_TILING(display)) return 128; else return 512; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index b8269391bc69..be3edf20de22 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -36,6 +36,5 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_MOBILE(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) -#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) #endif -- cgit v1.2.3 From 3a5c5c472c0e94c7cb8a5173b839612f3b042487 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 13 Oct 2025 17:45:52 +0300 Subject: drm/i915/display: add HAS_AUX_CCS() feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should try to get rid of checks that depend on struct drm_i915_private (or struct xe_device) in display code. HAS_FLAT_CCS() is one of them. In the interest of simplicity, add a reversed HAS_AUX_CCS() feature check macro, as that's we mostly use it for in display. v2: include adl-p (Ville) Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://lore.kernel.org/r/20251013144552.1710851-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.h | 1 + drivers/gpu/drm/i915/display/intel_fb.c | 4 +--- drivers/gpu/drm/i915/display/skl_universal_plane.c | 4 ++-- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 -- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 9960ac13a6dd..8fdb8a0a4282 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -146,6 +146,7 @@ struct intel_display_platforms { #define HAS_4TILE(__display) ((__display)->platform.dg2 || DISPLAY_VER(__display) >= 14) #define HAS_ASYNC_FLIPS(__display) (DISPLAY_VER(__display) >= 5) #define HAS_AS_SDP(__display) (DISPLAY_VER(__display) >= 13) +#define HAS_AUX_CCS(__display) (IS_DISPLAY_VER(__display, 9, 12) || (__display)->platform.alderlake_p || (__display)->platform.meteorlake) #define HAS_BIGJOINER(__display) (DISPLAY_VER(__display) >= 11 && HAS_DSC(__display)) #define HAS_CDCLK_CRAWL(__display) (DISPLAY_INFO(__display)->has_cdclk_crawl) #define HAS_CDCLK_SQUASH(__display) (DISPLAY_INFO(__display)->has_cdclk_squash) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 7388791dfde0..9c256a2805e4 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -547,8 +547,6 @@ static bool plane_has_modifier(struct intel_display *display, u8 plane_caps, const struct intel_modifier_desc *md) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (!IS_DISPLAY_VER(display, md->display_ver.from, md->display_ver.until)) return false; @@ -560,7 +558,7 @@ static bool plane_has_modifier(struct intel_display *display, * where supported. */ if (intel_fb_is_ccs_modifier(md->modifier) && - HAS_FLAT_CCS(i915) != !md->ccs.packed_aux_planes) + HAS_AUX_CCS(display) != !!md->ccs.packed_aux_planes) return false; if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS && diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e13fb781e7b2..0319174adf95 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1572,7 +1572,7 @@ icl_plane_update_noarm(struct intel_dsb *dsb, } /* FLAT CCS doesn't need to program AUX_DIST */ - if (!HAS_FLAT_CCS(to_i915(display->drm)) && DISPLAY_VER(display) < 20) + if (HAS_AUX_CCS(display)) intel_de_write_dsb(display, dsb, PLANE_AUX_DIST(pipe, plane_id), skl_plane_aux_dist(plane_state, color_plane)); @@ -2930,7 +2930,7 @@ skl_universal_plane_create(struct intel_display *display, caps = skl_plane_caps(display, pipe, plane_id); /* FIXME: xe has problems with AUX */ - if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(to_i915(display->drm))) + if (!IS_ENABLED(I915) && HAS_AUX_CCS(display)) caps &= ~(INTEL_PLANE_CAP_CCS_RC | INTEL_PLANE_CAP_CCS_RC_CC | INTEL_PLANE_CAP_CCS_MC); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index be3edf20de22..7c657ea98a44 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -35,6 +35,4 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_MOBILE(xe) (xe && 0) -#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - #endif -- cgit v1.2.3 From 56d9d03450f065fd31e56de5fafa7d498b318531 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 10 Oct 2025 11:00:21 +0300 Subject: drm/i915: drop unused non-i915 IS_ macros The IS_LUNARLAKE(), IS_BATTLEMAGE(), and IS_PANTHERLAKE() macros were added for compatibility with the xe driver, for display needs, even though i915 does not support the platforms in question. Display has since moved away from the macros, and they are no longer needed. Remove. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20251010080021.1087315-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4b66e5d017d9..95f9ddf22ce4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -488,16 +488,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ALDERLAKE_P(i915) IS_PLATFORM(i915, INTEL_ALDERLAKE_P) #define IS_DG2(i915) IS_PLATFORM(i915, INTEL_DG2) #define IS_METEORLAKE(i915) IS_PLATFORM(i915, INTEL_METEORLAKE) -/* - * Display code shared by i915 and Xe relies on macros like IS_LUNARLAKE, - * so we need to define these even on platforms that the i915 base driver - * doesn't support. Ensure the parameter is used in the definition to - * avoid 'unused variable' warnings when compiling the shared display code - * for i915. - */ -#define IS_LUNARLAKE(i915) (0 && i915) -#define IS_BATTLEMAGE(i915) (0 && i915) -#define IS_PANTHERLAKE(i915) (0 && i915) #define IS_ARROWLAKE_H(i915) \ IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL_H) -- cgit v1.2.3 From f65223ba4e832c0df5729670e6afd3968dc44389 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Wed, 8 Oct 2025 00:43:17 +0300 Subject: drm/i915/fbc: update the impacted platforms in wa_22014263786 wa_22014263786 is not applicable to the BMG and hence exclude it from the wa. v2: Limit this wa to display verion 11 to 14, drop DG2 from the exclusion list, use intel_display_wa (Lucas) v3: simplify the wa handling loop (Jani) Description of wa moved to place where wa is applied (Ville) v4: drop the platforms line from wa comments (Lucas) Bspec: 74212, 66624 Signed-off-by: Vinod Govindapillai Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251007214317.875781-1-vinod.govindapillai@intel.com --- drivers/gpu/drm/i915/display/intel_display_wa.c | 2 ++ drivers/gpu/drm/i915/display/intel_display_wa.h | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 9 ++++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.c b/drivers/gpu/drm/i915/display/intel_display_wa.c index 31cd2c9cd488..c528aaa679ca 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.c +++ b/drivers/gpu/drm/i915/display/intel_display_wa.c @@ -67,6 +67,8 @@ bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, return intel_display_needs_wa_16025573575(display); case INTEL_DISPLAY_WA_14011503117: return DISPLAY_VER(display) == 13; + case INTEL_DISPLAY_WA_22014263786: + return IS_DISPLAY_VERx100(display, 1100, 1400); default: drm_WARN(display->drm, 1, "Missing Wa number: %s\n", name); break; diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.h b/drivers/gpu/drm/i915/display/intel_display_wa.h index abc1df83f066..3644e8e2b724 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.h +++ b/drivers/gpu/drm/i915/display/intel_display_wa.h @@ -25,6 +25,7 @@ enum intel_display_wa { INTEL_DISPLAY_WA_16023588340, INTEL_DISPLAY_WA_16025573575, INTEL_DISPLAY_WA_14011503117, + INTEL_DISPLAY_WA_22014263786, }; bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, const char *name); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 4edb4342833e..83903bb696ff 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -928,9 +928,12 @@ static void intel_fbc_program_workarounds(struct intel_fbc *fbc) if (IS_DISPLAY_VER(display, 11, 12)) intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_COMP_DUMMY_PIXEL); - - /* Wa_22014263786:icl,jsl,tgl,dg1,rkl,adls,adlp,mtl */ - if (DISPLAY_VER(display) >= 11 && !display->platform.dg2) + /* + * Wa_22014263786 + * Fixes: Screen flicker with FBC and Package C state enabled + * Workaround: Forced SLB invalidation before start of new frame. + */ + if (intel_display_wa(display, 22014263786)) intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_FORCE_SLB_INVALIDATION); -- cgit v1.2.3 From 88ad12c973a204eafecdf8da43a75000faf5f1d7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:40 +0300 Subject: drm/i915/display: Sanitize PHY_C20_VDR_CUSTOM_SERDES_RATE/DP_RATE field macros Rename the PHY_C20_CUSTOM_SERDES / PHY_C20_CUSTOM_SERDES_MASK register field names to PHY_C20_DP_RATE / PHY_C20_DP_RATE_MASK, and move the definitions under the actual register containing the fields. Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-2-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 6 +++--- drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index a2d2cecf7121..0d83145eff41 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2700,12 +2700,12 @@ static void intel_c20_pll_program(struct intel_display *display, /* 5. For DP or 6. For HDMI */ if (is_dp) { intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - BIT(6) | PHY_C20_CUSTOM_SERDES_MASK, - BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(port_clock)), + BIT(6) | PHY_C20_DP_RATE_MASK, + BIT(6) | PHY_C20_DP_RATE(intel_c20_get_dp_rate(port_clock)), MB_WRITE_COMMITTED); } else { intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - BIT(7) | PHY_C20_CUSTOM_SERDES_MASK, + BIT(7) | PHY_C20_DP_RATE_MASK, is_hdmi_frl(port_clock) ? BIT(7) : 0, MB_WRITE_COMMITTED); diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 77eae1d845f7..25ab8808e548 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -298,10 +298,10 @@ #define PHY_C20_RD_DATA_L 0xC08 #define PHY_C20_RD_DATA_H 0xC09 #define PHY_C20_VDR_CUSTOM_SERDES_RATE 0xD00 +#define PHY_C20_DP_RATE_MASK REG_GENMASK8(4, 1) +#define PHY_C20_DP_RATE(val) REG_FIELD_PREP8(PHY_C20_DP_RATE_MASK, val) #define PHY_C20_VDR_HDMI_RATE 0xD01 #define PHY_C20_CONTEXT_TOGGLE REG_BIT8(0) -#define PHY_C20_CUSTOM_SERDES_MASK REG_GENMASK8(4, 1) -#define PHY_C20_CUSTOM_SERDES(val) REG_FIELD_PREP8(PHY_C20_CUSTOM_SERDES_MASK, val) #define PHY_C20_VDR_CUSTOM_WIDTH 0xD02 #define PHY_C20_CUSTOM_WIDTH_MASK REG_GENMASK(1, 0) #define PHY_C20_CUSTOM_WIDTH(val) REG_FIELD_PREP8(PHY_C20_CUSTOM_WIDTH_MASK, val) -- cgit v1.2.3 From cb2f168b1533088e2a71f97e3c821c38fea61d97 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:41 +0300 Subject: drm/i915/display: Sanitize PHY_C20_VDR_CUSTOM_SERDES_RATE/IS_DP flag macro Define PHY_C20_IS_DP, so it can be used instead of the plain bit number. Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-3-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 4 ++-- drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 0d83145eff41..9492661f1645 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2700,8 +2700,8 @@ static void intel_c20_pll_program(struct intel_display *display, /* 5. For DP or 6. For HDMI */ if (is_dp) { intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - BIT(6) | PHY_C20_DP_RATE_MASK, - BIT(6) | PHY_C20_DP_RATE(intel_c20_get_dp_rate(port_clock)), + PHY_C20_IS_DP | PHY_C20_DP_RATE_MASK, + PHY_C20_IS_DP | PHY_C20_DP_RATE(intel_c20_get_dp_rate(port_clock)), MB_WRITE_COMMITTED); } else { intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 25ab8808e548..ad2f7fb3beae 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -298,6 +298,7 @@ #define PHY_C20_RD_DATA_L 0xC08 #define PHY_C20_RD_DATA_H 0xC09 #define PHY_C20_VDR_CUSTOM_SERDES_RATE 0xD00 +#define PHY_C20_IS_DP REG_BIT8(6) #define PHY_C20_DP_RATE_MASK REG_GENMASK8(4, 1) #define PHY_C20_DP_RATE(val) REG_FIELD_PREP8(PHY_C20_DP_RATE_MASK, val) #define PHY_C20_VDR_HDMI_RATE 0xD01 -- cgit v1.2.3 From a1792df698d4ef32fb39a259e37daab9646c4fe4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:42 +0300 Subject: drm/i915/display: Sanitize PHY_C20_VDR_CUSTOM_SERDES_RATE/CONTEXT_TOGGLE flag macro Move the PHY_C20_CONTEXT_TOGGLE flag's definition under the register containing the flag and refer to the flag always by its name instead of a plain bit number. v2: Amend commit log to match what the patch does. (Jani) Cc: Jani Nikula Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-4-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 6 ++++-- drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 9492661f1645..a7aee098e7b9 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2628,7 +2628,8 @@ static void intel_c20_pll_program(struct intel_display *display, int i; /* 1. Read current context selection */ - cntx = intel_cx0_read(encoder, INTEL_CX0_LANE0, PHY_C20_VDR_CUSTOM_SERDES_RATE) & BIT(0); + cntx = intel_cx0_read(encoder, INTEL_CX0_LANE0, PHY_C20_VDR_CUSTOM_SERDES_RATE) & + PHY_C20_CONTEXT_TOGGLE; /* * 2. If there is a protocol switch from HDMI to DP or vice versa, clear @@ -2719,7 +2720,8 @@ static void intel_c20_pll_program(struct intel_display *display, * the updated programming toggle context bit */ intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - BIT(0), cntx ? 0 : 1, MB_WRITE_COMMITTED); + PHY_C20_CONTEXT_TOGGLE, cntx ? 0 : PHY_C20_CONTEXT_TOGGLE, + MB_WRITE_COMMITTED); } static int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index ad2f7fb3beae..5bd1e02b5313 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -301,8 +301,8 @@ #define PHY_C20_IS_DP REG_BIT8(6) #define PHY_C20_DP_RATE_MASK REG_GENMASK8(4, 1) #define PHY_C20_DP_RATE(val) REG_FIELD_PREP8(PHY_C20_DP_RATE_MASK, val) -#define PHY_C20_VDR_HDMI_RATE 0xD01 #define PHY_C20_CONTEXT_TOGGLE REG_BIT8(0) +#define PHY_C20_VDR_HDMI_RATE 0xD01 #define PHY_C20_VDR_CUSTOM_WIDTH 0xD02 #define PHY_C20_CUSTOM_WIDTH_MASK REG_GENMASK(1, 0) #define PHY_C20_CUSTOM_WIDTH(val) REG_FIELD_PREP8(PHY_C20_CUSTOM_WIDTH_MASK, val) -- cgit v1.2.3 From b02c9b5e6f67a50eb530bdbf320f5f3eae51f90f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:43 +0300 Subject: drm/i915/display: Sanitize PHY_C20_VDR_CUSTOM_SERDES_RATE/IS_HDMI_FRL flag macro Define PHY_C20_IS_HDMI_FRL, so it can be used instead of the plain bit number. Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-5-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 4 ++-- drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index a7aee098e7b9..9be7e155a584 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2706,8 +2706,8 @@ static void intel_c20_pll_program(struct intel_display *display, MB_WRITE_COMMITTED); } else { intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - BIT(7) | PHY_C20_DP_RATE_MASK, - is_hdmi_frl(port_clock) ? BIT(7) : 0, + PHY_C20_IS_HDMI_FRL | PHY_C20_DP_RATE_MASK, + is_hdmi_frl(port_clock) ? PHY_C20_IS_HDMI_FRL : 0, MB_WRITE_COMMITTED); intel_cx0_write(encoder, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 5bd1e02b5313..0743a3e2d15f 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -298,6 +298,7 @@ #define PHY_C20_RD_DATA_L 0xC08 #define PHY_C20_RD_DATA_H 0xC09 #define PHY_C20_VDR_CUSTOM_SERDES_RATE 0xD00 +#define PHY_C20_IS_HDMI_FRL REG_BIT8(7) #define PHY_C20_IS_DP REG_BIT8(6) #define PHY_C20_DP_RATE_MASK REG_GENMASK8(4, 1) #define PHY_C20_DP_RATE(val) REG_FIELD_PREP8(PHY_C20_DP_RATE_MASK, val) -- cgit v1.2.3 From 4fd6053274d2981d2cd65a9370a700c9fa73384c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:44 +0300 Subject: drm/i915/display: Fix PHY_C20_VDR_CUSTOM_SERDES_RATE programming Make sure all the DP/HDMI/HDMI-FRL flags are programmed in all the modes the PLL is configured. Atm the DP mode flag is not programmed in case the PLL is configured for HDMI mode for instance. This is incorrect after HW reset, since the DP mode flag is asserted after reset, hence would need to be cleared for HDMI, but also incorrect because of the same reason after configuring the PLL to HDMI mode after it was used in DP mode (for instance on a DP++ connector). There is a similar issue with the HDMI-FRL flag, potentially remaining set when configuring the PLL in DP mode. Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-6-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 9be7e155a584..6e49659d2f17 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2624,6 +2624,7 @@ static void intel_c20_pll_program(struct intel_display *display, bool is_dp, int port_clock) { u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); + u8 serdes; bool cntx; int i; @@ -2699,21 +2700,23 @@ static void intel_c20_pll_program(struct intel_display *display, MB_WRITE_COMMITTED); /* 5. For DP or 6. For HDMI */ - if (is_dp) { - intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - PHY_C20_IS_DP | PHY_C20_DP_RATE_MASK, - PHY_C20_IS_DP | PHY_C20_DP_RATE(intel_c20_get_dp_rate(port_clock)), - MB_WRITE_COMMITTED); - } else { - intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, - PHY_C20_IS_HDMI_FRL | PHY_C20_DP_RATE_MASK, - is_hdmi_frl(port_clock) ? PHY_C20_IS_HDMI_FRL : 0, - MB_WRITE_COMMITTED); + serdes = 0; + + if (is_dp) + serdes = PHY_C20_IS_DP | + PHY_C20_DP_RATE(intel_c20_get_dp_rate(port_clock)); + else if (is_hdmi_frl(port_clock)) + serdes = PHY_C20_IS_HDMI_FRL; + intel_cx0_rmw(encoder, owned_lane_mask, PHY_C20_VDR_CUSTOM_SERDES_RATE, + PHY_C20_IS_DP | PHY_C20_DP_RATE_MASK | PHY_C20_IS_HDMI_FRL, + serdes, + MB_WRITE_COMMITTED); + + if (!is_dp) intel_cx0_write(encoder, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, intel_c20_get_hdmi_rate(port_clock), MB_WRITE_COMMITTED); - } /* * 7. Write Vendor specific registers to toggle context setting to load -- cgit v1.2.3 From 938a3b2252a2565f4b78dc9281b93ad71593fe5f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:45 +0300 Subject: drm/i915/display: Fix PHY_C20_VDR_HDMI_RATE programming The PHY_C20_VDR_HDMI_RATE registers 7:2 bits are reserved and they are not specified as a must-be-zero field. Accordingly this reserved field shouldn't be zeroed; to ensure that use an RMW to update the PHY_C20_HDMI_RATE field (which is bits 1:0 of the register). Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-7-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 7 ++++--- drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 6e49659d2f17..f8c1338f9053 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2714,9 +2714,10 @@ static void intel_c20_pll_program(struct intel_display *display, MB_WRITE_COMMITTED); if (!is_dp) - intel_cx0_write(encoder, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, - intel_c20_get_hdmi_rate(port_clock), - MB_WRITE_COMMITTED); + intel_cx0_rmw(encoder, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, + PHY_C20_HDMI_RATE_MASK, + intel_c20_get_hdmi_rate(port_clock), + MB_WRITE_COMMITTED); /* * 7. Write Vendor specific registers to toggle context setting to load diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index 0743a3e2d15f..86e2e1c7babf 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -304,6 +304,8 @@ #define PHY_C20_DP_RATE(val) REG_FIELD_PREP8(PHY_C20_DP_RATE_MASK, val) #define PHY_C20_CONTEXT_TOGGLE REG_BIT8(0) #define PHY_C20_VDR_HDMI_RATE 0xD01 +#define PHY_C20_HDMI_RATE_MASK REG_GENMASK8(1, 0) +#define PHY_C20_HDMI_RATE(val) REG_FIELD_PREP8(PHY_C20_HDMI_RATE_MASK, val) #define PHY_C20_VDR_CUSTOM_WIDTH 0xD02 #define PHY_C20_CUSTOM_WIDTH_MASK REG_GENMASK(1, 0) #define PHY_C20_CUSTOM_WIDTH(val) REG_FIELD_PREP8(PHY_C20_CUSTOM_WIDTH_MASK, val) -- cgit v1.2.3 From 682505a0fce602b1634cf0a2ea76d017b60b4c81 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 15:54:46 +0300 Subject: drm/i915/display: Add missing clock to C10 PHY state compute/HW readout Clock value is missing from C10 hw readout stage. Let's fix this. Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251015125446.3931198-8-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 5 +++++ drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index f8c1338f9053..a74c1be225ac 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2103,6 +2103,9 @@ static int intel_c10pll_calc_state(struct intel_crtc_state *crtc_state, return 0; } +static int intel_c10pll_calc_port_clock(struct intel_encoder *encoder, + const struct intel_c10pll_state *pll_state); + static void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, struct intel_c10pll_state *pll_state) { @@ -2127,6 +2130,8 @@ static void intel_c10pll_readout_hw_state(struct intel_encoder *encoder, pll_state->tx = intel_cx0_read(encoder, lane, PHY_C10_VDR_TX(0)); intel_cx0_phy_transaction_end(encoder, wakeref); + + pll_state->clock = intel_c10pll_calc_port_clock(encoder, pll_state); } static void intel_c10_pll_program(struct intel_display *display, diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index 7fe6b4a18213..a201edceee10 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -332,6 +332,8 @@ void intel_snps_hdmi_pll_compute_c10pll(struct intel_c10pll_state *pll_state, u6 c10_curve_1, c10_curve_2, prescaler_divider, &pll_params); + pll_state->clock = pixel_clock; + pll_state->tx = 0x10; pll_state->cmn = 0x1; pll_state->pll[0] = REG_FIELD_PREP(C10_PLL0_DIV5CLK_EN, pll_params.mpll_div5_en) | -- cgit v1.2.3 From 8f8ef09fcf6a3b00369bfc704e8f68d7474eca94 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Oct 2025 12:51:35 +0300 Subject: drm/i915/panic: fix panic structure allocation memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separating the panic allocation from framebuffer allocation in commit 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") failed to deallocate the panic structure anywhere. The fix is two-fold. First, free the panic structure in intel_user_framebuffer_destroy() in the general case. Second, move the panic allocation later to intel_framebuffer_init() to not leak the panic structure in error paths (if any, now or later) between intel_framebuffer_alloc() and intel_framebuffer_init(). v2: Rebase Fixes: 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") Cc: Jocelyn Falempe Cc: Maarten Lankhorst Reported-by: Michał Grzelak Suggested-by: Ville Syrjälä Tested-by: Michał Grzelak # v1 Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251015095135.2183415-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 9c256a2805e4..3958628c73e9 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2114,6 +2114,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb->panic); kfree(intel_fb); } @@ -2212,16 +2213,22 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct intel_display *display = to_intel_display(obj->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - int ret = -EINVAL; + int ret; int i; + intel_fb->panic = intel_panic_alloc(); + if (!intel_fb->panic) + return -ENOMEM; + /* * intel_frontbuffer_get() must be done before * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. */ intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) - return -ENOMEM; + if (!intel_fb->frontbuffer) { + ret = -ENOMEM; + goto err_free_panic; + } ret = intel_fb_bo_framebuffer_init(obj, mode_cmd); if (ret) @@ -2320,6 +2327,9 @@ err_bo_framebuffer_fini: intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); +err_free_panic: + kfree(intel_fb->panic); + return ret; } @@ -2346,20 +2356,11 @@ intel_user_framebuffer_create(struct drm_device *dev, struct intel_framebuffer *intel_framebuffer_alloc(void) { struct intel_framebuffer *intel_fb; - struct intel_panic *panic; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) return NULL; - panic = intel_panic_alloc(); - if (!panic) { - kfree(intel_fb); - return NULL; - } - - intel_fb->panic = panic; - return intel_fb; } -- cgit v1.2.3 From f71f86daa1b28def08bd855dd8ac3c1a9b19ae4c Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:05 +0530 Subject: drm/i915/vrr: Use crtc_vsync_start/end for computing vrr.vsync_start/end Use adjusted_mode->crtc_vsync_start/end instead of adjusted_mode->vsync_start while computing vrr.vsync_start/end. For most modes, these are same but for 3D/stereo modes the crtc_vsync_start is different than vsync_start. Signed-off-by: Ankit Nautiyal Reviewed-by: Uma Shankar Link: https://lore.kernel.org/r/20251016055415.2101347-2-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 190c51be5cbc..4bc14b5e685f 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -394,10 +394,10 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, if (HAS_AS_SDP(display)) { crtc_state->vrr.vsync_start = (crtc_state->hw.adjusted_mode.crtc_vtotal - - crtc_state->hw.adjusted_mode.vsync_start); + crtc_state->hw.adjusted_mode.crtc_vsync_start); crtc_state->vrr.vsync_end = (crtc_state->hw.adjusted_mode.crtc_vtotal - - crtc_state->hw.adjusted_mode.vsync_end); + crtc_state->hw.adjusted_mode.crtc_vsync_end); } } -- cgit v1.2.3 From 22378988303aae9b70d4ae579ff719cc7063770e Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:06 +0530 Subject: drm/i915/display: Move intel_dpll_crtc_compute_clock early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move intel_dpll_crtc_compute_clock in the beginning of the function so that clocks are set before other things. This will help in subsequent changes when the vrr guardband computation is moved to intel_crtc_compute_config(). Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251016055415.2101347-3-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d5b2612d4ec2..3f725553599e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2410,11 +2410,11 @@ static int intel_crtc_compute_config(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); int ret; - ret = intel_crtc_compute_set_context_latency(state, crtc); + ret = intel_dpll_crtc_compute_clock(state, crtc); if (ret) return ret; - ret = intel_dpll_crtc_compute_clock(state, crtc); + ret = intel_crtc_compute_set_context_latency(state, crtc); if (ret) return ret; -- cgit v1.2.3 From 2cdcab0d3a681f655757dab9d9719ac069b359ff Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:07 +0530 Subject: drm/i915/vrr: s/intel_vrr_compute_config_late/intel_vrr_compute_guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The helper intel_vrr_compute_config_late() practically just computes the guardband. Rename intel_vrr_compute_config_late() to intel_vrr_compute_guardband(). Since we are going to compute the guardband and then move the vblank_start for optmizing guardband move it to intel_crtc_compute_config() which handles such changes. v2: Move the function at the last after clocks, pipe_mode etc. are all set. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251016055415.2101347-4-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- drivers/gpu/drm/i915/display/intel_vrr.c | 2 +- drivers/gpu/drm/i915/display/intel_vrr.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3f725553599e..ceee5ae99c2c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2431,6 +2431,8 @@ static int intel_crtc_compute_config(struct intel_atomic_state *state, if (crtc_state->has_pch_encoder) return ilk_fdi_compute_config(crtc, crtc_state); + intel_vrr_compute_guardband(crtc_state); + return 0; } @@ -4722,8 +4724,6 @@ intel_modeset_pipe_config_late(struct intel_atomic_state *state, struct drm_connector *connector; int i; - intel_vrr_compute_config_late(crtc_state); - for_each_new_connector_in_state(&state->base, connector, conn_state, i) { struct intel_encoder *encoder = diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 4bc14b5e685f..8d71d7dc9d12 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -433,7 +433,7 @@ intel_vrr_max_guardband(struct intel_crtc_state *crtc_state) intel_vrr_max_vblank_guardband(crtc_state)); } -void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state) +void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; diff --git a/drivers/gpu/drm/i915/display/intel_vrr.h b/drivers/gpu/drm/i915/display/intel_vrr.h index 7317f8730089..bc9044621635 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.h +++ b/drivers/gpu/drm/i915/display/intel_vrr.h @@ -21,7 +21,7 @@ bool intel_vrr_possible(const struct intel_crtc_state *crtc_state); void intel_vrr_check_modeset(struct intel_atomic_state *state); void intel_vrr_compute_config(struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state); -void intel_vrr_compute_config_late(struct intel_crtc_state *crtc_state); +void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state); void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state); void intel_vrr_enable(const struct intel_crtc_state *crtc_state); void intel_vrr_send_push(struct intel_dsb *dsb, -- cgit v1.2.3 From ca4edafa149011010b1943edafc3993a99c8df7c Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:08 +0530 Subject: drm/i915/vblank: Add helper to get correct vblank length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently crtc_vblank_start is assumed to be the vblank_start for the fixed refresh rate case. That value can be different from the variable refresh rate case whenever always_use_vrr_tg()==false. On icl/tgl it's always different due to the extra vblank delay, and also on adl+ it could be different if we were to use an optimized guardband. So places where crtc_vblank_start is used to compute vblank length needs change so as to account for cases where vrr is enabled. Specifically with vrr.enable the effective vblank length is actually guardband. Add a helper to get the correct vblank length for both vrr and fixed refresh rate cases. Use this helper where vblank_start is used to compute the vblank length. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251016055415.2101347-5-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vblank.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_vblank.h | 2 ++ drivers/gpu/drm/i915/display/skl_watermark.c | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 0b7fcc05e64c..2fc0c1c0bb87 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -767,3 +767,13 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) return scanline; } + +int intel_crtc_vblank_length(const struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + + if (crtc_state->vrr.enable) + return crtc_state->vrr.guardband; + else + return adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vblank_start; +} diff --git a/drivers/gpu/drm/i915/display/intel_vblank.h b/drivers/gpu/drm/i915/display/intel_vblank.h index 21fbb08d61d5..98d04cacd65f 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.h +++ b/drivers/gpu/drm/i915/display/intel_vblank.h @@ -48,4 +48,6 @@ const struct intel_crtc_state * intel_pre_commit_crtc_state(struct intel_atomic_state *state, struct intel_crtc *crtc); +int intel_crtc_vblank_length(const struct intel_crtc_state *crtc_state); + #endif /* __INTEL_VBLANK_H__ */ diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 9df9ee137bf9..06e5e6c77d2e 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -28,6 +28,7 @@ #include "intel_flipq.h" #include "intel_pcode.h" #include "intel_plane.h" +#include "intel_vblank.h" #include "intel_wm.h" #include "skl_universal_plane_regs.h" #include "skl_watermark.h" @@ -2241,7 +2242,7 @@ skl_is_vblank_too_short(const struct intel_crtc_state *crtc_state, scaler_prefill_latency(crtc_state) + dsc_prefill_latency(crtc_state) + wm0_lines > - adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vblank_start; + intel_crtc_vblank_length(crtc_state); } static int skl_max_wm0_lines(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From e00eb31a5ad62126e701129b3809ab124b001d39 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:09 +0530 Subject: drm/i915/psr: Consider SCL lines when validating vblank for wake latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Panel Replay and PSR2 selective update require sufficient vblank duration to accommodate wake latencies. However, the current wake_lines_fit_into_vblank() logic does not account for the minimum Set Context Latency (SCL) lines. Separate out _intel_psr_min_set_context_latency() to compute the minimum SCL requirement based on platform and feature usage. The alpm_config_valid() helper is updated to pass the necessary context for determining whether Panel Replay or PSR2 selective update is enabled. v2: While calling alpm_config_valid() for selective_update use false flag instead of has_panel_replay. (Jouni) v3: Correct ordering of the panel_replay, sel_update flags. (Jouni) Signed-off-by: Ankit Nautiyal Cc: Animesh Manna Cc: Jouni Högander Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251016055415.2101347-6-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 102 ++++++++++++++++++------------- 1 file changed, 61 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 2131473cead6..1d06011a97ce 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1361,14 +1361,64 @@ static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp, return entry_setup_frames; } +static +int _intel_psr_min_set_context_latency(const struct intel_crtc_state *crtc_state, + bool needs_panel_replay, + bool needs_sel_update) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (!crtc_state->has_psr) + return 0; + + /* Wa_14015401596 */ + if (intel_vrr_possible(crtc_state) && IS_DISPLAY_VER(display, 13, 14)) + return 1; + + /* Rest is for SRD_STATUS needed on LunarLake and onwards */ + if (DISPLAY_VER(display) < 20) + return 0; + + /* + * Comment on SRD_STATUS register in Bspec for LunarLake and onwards: + * + * To deterministically capture the transition of the state machine + * going from SRDOFFACK to IDLE, the delayed V. Blank should be at least + * one line after the non-delayed V. Blank. + * + * Legacy TG: TRANS_SET_CONTEXT_LATENCY > 0 + * VRR TG: TRANS_VRR_CTL[ VRR Guardband ] < (TRANS_VRR_VMAX[ VRR Vmax ] + * - TRANS_VTOTAL[ Vertical Active ]) + * + * SRD_STATUS is used only by PSR1 on PantherLake. + * SRD_STATUS is used by PSR1 and Panel Replay DP on LunarLake. + */ + + if (DISPLAY_VER(display) >= 30 && (needs_panel_replay || + needs_sel_update)) + return 0; + else if (DISPLAY_VER(display) < 30 && (needs_sel_update || + intel_crtc_has_type(crtc_state, + INTEL_OUTPUT_EDP))) + return 0; + else + return 1; +} + static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, - bool aux_less) + bool aux_less, + bool needs_panel_replay, + bool needs_sel_update) { struct intel_display *display = to_intel_display(intel_dp); int vblank = crtc_state->hw.adjusted_mode.crtc_vblank_end - crtc_state->hw.adjusted_mode.crtc_vblank_start; int wake_lines; + int scl = _intel_psr_min_set_context_latency(crtc_state, + needs_panel_replay, + needs_sel_update); + vblank -= scl; if (aux_less) wake_lines = crtc_state->alpm_state.aux_less_wake_lines; @@ -1390,7 +1440,9 @@ static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, static bool alpm_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, - bool aux_less) + bool aux_less, + bool needs_panel_replay, + bool needs_sel_update) { struct intel_display *display = to_intel_display(intel_dp); @@ -1400,7 +1452,8 @@ static bool alpm_config_valid(struct intel_dp *intel_dp, return false; } - if (!wake_lines_fit_into_vblank(intel_dp, crtc_state, aux_less)) { + if (!wake_lines_fit_into_vblank(intel_dp, crtc_state, aux_less, + needs_panel_replay, needs_sel_update)) { drm_dbg_kms(display->drm, "PSR2/Panel Replay not enabled, too short vblank time\n"); return false; @@ -1492,7 +1545,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - if (!alpm_config_valid(intel_dp, crtc_state, false)) + if (!alpm_config_valid(intel_dp, crtc_state, false, false, true)) return false; if (!crtc_state->enable_psr2_sel_fetch && @@ -1643,7 +1696,7 @@ _panel_replay_compute_config(struct intel_dp *intel_dp, return false; } - if (!alpm_config_valid(intel_dp, crtc_state, true)) + if (!alpm_config_valid(intel_dp, crtc_state, true, true, false)) return false; return true; @@ -2371,43 +2424,10 @@ void intel_psr_trigger_frame_change_event(struct intel_dsb *dsb, */ int intel_psr_min_set_context_latency(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - - if (!crtc_state->has_psr) - return 0; - - /* Wa_14015401596 */ - if (intel_vrr_possible(crtc_state) && IS_DISPLAY_VER(display, 13, 14)) - return 1; - - /* Rest is for SRD_STATUS needed on LunarLake and onwards */ - if (DISPLAY_VER(display) < 20) - return 0; - /* - * Comment on SRD_STATUS register in Bspec for LunarLake and onwards: - * - * To deterministically capture the transition of the state machine - * going from SRDOFFACK to IDLE, the delayed V. Blank should be at least - * one line after the non-delayed V. Blank. - * - * Legacy TG: TRANS_SET_CONTEXT_LATENCY > 0 - * VRR TG: TRANS_VRR_CTL[ VRR Guardband ] < (TRANS_VRR_VMAX[ VRR Vmax ] - * - TRANS_VTOTAL[ Vertical Active ]) - * - * SRD_STATUS is used only by PSR1 on PantherLake. - * SRD_STATUS is used by PSR1 and Panel Replay DP on LunarLake. - */ - - if (DISPLAY_VER(display) >= 30 && (crtc_state->has_panel_replay || - crtc_state->has_sel_update)) - return 0; - else if (DISPLAY_VER(display) < 30 && (crtc_state->has_sel_update || - intel_crtc_has_type(crtc_state, - INTEL_OUTPUT_EDP))) - return 0; - else - return 1; + return _intel_psr_min_set_context_latency(crtc_state, + crtc_state->has_panel_replay, + crtc_state->has_sel_update); } static u32 man_trk_ctl_enable_bit_get(struct intel_display *display) -- cgit v1.2.3 From 3fdae52c3dde0e00b910fded31bbfccd24a527ae Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:10 +0530 Subject: drm/i915/psr: Introduce helper intel_psr_set_non_psr_pipes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function to set non-psr pipes in crtc_state based on psr features. This will help to move this part later where we re-evaluate psr features and update the non-psr pipes. Signed-off-by: Ankit Nautiyal Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251016055415.2101347-7-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 49 +++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1d06011a97ce..e97dcfa7673c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1711,15 +1711,40 @@ static bool intel_psr_needs_wa_18037818876(struct intel_dp *intel_dp, !crtc_state->has_sel_update); } +static +void intel_psr_set_non_psr_pipes(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct intel_crtc *crtc; + u8 active_pipes = 0; + + /* Wa_16025596647 */ + if (DISPLAY_VER(display) != 20 && + !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) + return; + + /* Not needed by Panel Replay */ + if (crtc_state->has_panel_replay) + return; + + /* We ignore possible secondary PSR/Panel Replay capable eDP */ + for_each_intel_crtc(display->drm, crtc) + active_pipes |= crtc->active ? BIT(crtc->pipe) : 0; + + active_pipes = intel_calc_active_pipes(state, active_pipes); + + crtc_state->active_non_psr_pipes = active_pipes & + ~BIT(to_intel_crtc(crtc_state->uapi.crtc)->pipe); +} + void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(intel_dp); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); - struct intel_crtc *crtc; - u8 active_pipes = 0; if (!psr_global_enabled(intel_dp)) { drm_dbg_kms(display->drm, "PSR disabled by flag\n"); @@ -1768,23 +1793,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, "PSR disabled to workaround PSR FSM hang issue\n"); } - /* Rest is for Wa_16025596647 */ - if (DISPLAY_VER(display) != 20 && - !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) - return; - - /* Not needed by Panel Replay */ - if (crtc_state->has_panel_replay) - return; - - /* We ignore possible secondary PSR/Panel Replay capable eDP */ - for_each_intel_crtc(display->drm, crtc) - active_pipes |= crtc->active ? BIT(crtc->pipe) : 0; - - active_pipes = intel_calc_active_pipes(state, active_pipes); - - crtc_state->active_non_psr_pipes = active_pipes & - ~BIT(to_intel_crtc(crtc_state->uapi.crtc)->pipe); + intel_psr_set_non_psr_pipes(intel_dp, crtc_state); } void intel_psr_get_config(struct intel_encoder *encoder, -- cgit v1.2.3 From 0d7f4e99217f6f715c7064c67eae8d9d09313b14 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:11 +0530 Subject: drm/i915/display: Introduce dp/psr_compute_config_late() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce intel_dp_compute_config_late() to handle late-stage configuration checks for DP/eDP features. For now, it paves path for psr_compute_config_late() to handle psr parameters that need to be computed late. Move the handling of psr_flag for Wa_18037818876 and setting of non-psr pipes to intel_psr_compute_config_late() as these are the last things to be configured for PSR features. v2: Update dp_compute_config_late() to return int. Signed-off-by: Ankit Nautiyal Reviewed-by: Jouni Högander (#v1) Link: https://lore.kernel.org/r/20251016055415.2101347-8-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_dp.c | 11 +++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 3 +++ drivers/gpu/drm/i915/display/intel_psr.c | 24 +++++++++++++++--------- drivers/gpu/drm/i915/display/intel_psr.h | 2 ++ 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c09aa759f4d4..870140340342 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4559,6 +4559,13 @@ static int intel_ddi_compute_config_late(struct intel_encoder *encoder, struct intel_display *display = to_intel_display(encoder); struct drm_connector *connector = conn_state->connector; u8 port_sync_transcoders = 0; + int ret = 0; + + if (intel_crtc_has_dp_encoder(crtc_state)) + ret = intel_dp_compute_config_late(encoder, crtc_state, conn_state); + + if (ret) + return ret; drm_dbg_kms(display->drm, "[ENCODER:%d:%s] [CRTC:%d:%s]\n", encoder->base.base.id, encoder->base.name, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a723e846321f..7059d55687cf 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6979,3 +6979,14 @@ void intel_dp_mst_resume(struct intel_display *display) } } } + +int intel_dp_compute_config_late(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + intel_psr_compute_config_late(intel_dp, crtc_state); + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index b379443e0211..281ced3a3b39 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -218,5 +218,8 @@ int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state, int intel_dp_dsc_bpp_step_x16(const struct intel_connector *connector); void intel_dp_dpcd_set_probe(struct intel_dp *intel_dp, bool force_on_external); bool intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state); +int intel_dp_compute_config_late(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state); #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index e97dcfa7673c..383e6dc1ed63 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1785,15 +1785,6 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; crtc_state->has_sel_update = intel_sel_update_config_valid(intel_dp, crtc_state); - - /* Wa_18037818876 */ - if (intel_psr_needs_wa_18037818876(intel_dp, crtc_state)) { - crtc_state->has_psr = false; - drm_dbg_kms(display->drm, - "PSR disabled to workaround PSR FSM hang issue\n"); - } - - intel_psr_set_non_psr_pipes(intel_dp, crtc_state); } void intel_psr_get_config(struct intel_encoder *encoder, @@ -4355,3 +4346,18 @@ bool intel_psr_needs_alpm_aux_less(struct intel_dp *intel_dp, { return intel_dp_is_edp(intel_dp) && crtc_state->has_panel_replay; } + +void intel_psr_compute_config_late(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + /* Wa_18037818876 */ + if (intel_psr_needs_wa_18037818876(intel_dp, crtc_state)) { + crtc_state->has_psr = false; + drm_dbg_kms(display->drm, + "PSR disabled to workaround PSR FSM hang issue\n"); + } + + intel_psr_set_non_psr_pipes(intel_dp, crtc_state); +} diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 9147996d6c9e..b17ce312dc37 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -83,5 +83,7 @@ void intel_psr_debugfs_register(struct intel_display *display); bool intel_psr_needs_alpm(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); bool intel_psr_needs_alpm_aux_less(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); +void intel_psr_compute_config_late(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); #endif /* __INTEL_PSR_H__ */ -- cgit v1.2.3 From 8c0cf4fe9bb3b9d1b76a4dc6e9708a611a2e8f6c Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:12 +0530 Subject: drm/i915/psr: Check if final vblank is sufficient for PSR features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, wake line latency checks rely on the vblank length, which does not account for either the extra vblank delay for icl/tgl or for the optimized guardband which will come into picture later at some point. Validate whether the final vblank (with extra vblank delay) or guardband is sufficient to support wake line latencies required by Panel Replay and PSR2 selective update. Disable the PSR features if their wake requirements cannot be accomodated. v2: Add comments clarifying wake line checks and rationale for not resetting SCL. (Jouni) v3: Reset other psr flags based on features that are dropped. (Jouni) v4: Update commit message. v5: Remove early return and simplify the checking for wakelines. (Jouni) Signed-off-by: Ankit Nautiyal Cc: Animesh Manna Cc: Jouni Högander Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251016055415.2101347-9-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 71 ++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 383e6dc1ed63..703e5f6af04c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1405,6 +1405,20 @@ int _intel_psr_min_set_context_latency(const struct intel_crtc_state *crtc_state return 1; } +static bool _wake_lines_fit_into_vblank(const struct intel_crtc_state *crtc_state, + int vblank, + int wake_lines) +{ + if (crtc_state->req_psr2_sdp_prior_scanline) + vblank -= 1; + + /* Vblank >= PSR2_CTL Block Count Number maximum line count */ + if (vblank < wake_lines) + return false; + + return true; +} + static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool aux_less, @@ -1428,14 +1442,16 @@ static bool wake_lines_fit_into_vblank(struct intel_dp *intel_dp, crtc_state->alpm_state.fast_wake_lines) : crtc_state->alpm_state.io_wake_lines; - if (crtc_state->req_psr2_sdp_prior_scanline) - vblank -= 1; - - /* Vblank >= PSR2_CTL Block Count Number maximum line count */ - if (vblank < wake_lines) - return false; - - return true; + /* + * Guardband has not been computed yet, so we conservatively check if the + * full vblank duration is sufficient to accommodate wake line requirements + * for PSR features like Panel Replay and Selective Update. + * + * Once the actual guardband is available, a more accurate validation is + * performed in intel_psr_compute_config_late(), and PSR features are + * disabled if wake lines exceed the available guardband. + */ + return _wake_lines_fit_into_vblank(crtc_state, vblank, wake_lines); } static bool alpm_config_valid(struct intel_dp *intel_dp, @@ -4351,6 +4367,45 @@ void intel_psr_compute_config_late(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(intel_dp); + int vblank = intel_crtc_vblank_length(crtc_state); + int wake_lines; + + if (intel_psr_needs_alpm_aux_less(intel_dp, crtc_state)) + wake_lines = crtc_state->alpm_state.aux_less_wake_lines; + else if (intel_psr_needs_alpm(intel_dp, crtc_state)) + wake_lines = DISPLAY_VER(display) < 20 ? + psr2_block_count_lines(crtc_state->alpm_state.io_wake_lines, + crtc_state->alpm_state.fast_wake_lines) : + crtc_state->alpm_state.io_wake_lines; + else + wake_lines = 0; + + /* + * Disable the PSR features if wake lines exceed the available vblank. + * Though SCL is computed based on these PSR features, it is not reset + * even if the PSR features are disabled to avoid changing vblank start + * at this stage. + */ + if (wake_lines && !_wake_lines_fit_into_vblank(crtc_state, vblank, wake_lines)) { + drm_dbg_kms(display->drm, + "Adjusting PSR/PR mode: vblank too short for wake lines = %d\n", + wake_lines); + + if (crtc_state->has_panel_replay) { + crtc_state->has_panel_replay = false; + /* + * #TODO : Add fall back to PSR/PSR2 + * Since panel replay cannot be supported, we can fall back to PSR/PSR2. + * This will require calling compute_config for psr and psr2 with check for + * actual guardband instead of vblank_length. + */ + crtc_state->has_psr = false; + } + + crtc_state->has_sel_update = false; + crtc_state->enable_psr2_su_region_et = false; + crtc_state->enable_psr2_sel_fetch = false; + } /* Wa_18037818876 */ if (intel_psr_needs_wa_18037818876(intel_dp, crtc_state)) { -- cgit v1.2.3 From 5cb2cf6aa8d6647bb41ea8c90498e10d192411c8 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:13 +0530 Subject: drm/i915/display: Add vblank_start adjustment logic for always-on VRR TG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we move towards using a shorter, optimized guardband, we need to adjust how the delayed vblank start is computed. Adjust the crtc_vblank_start using Vmin Vtotal - guardband only when intel_vrr_always_use_vrr_tg() is true. Also update the pipe_mode->crtc_vblank_start which is derived from adjusted_mode->crtc_vblank_start in intel_crtc_compute_pipe_mode(). To maintain consistency between the computed and readout paths, update the readout logic in intel_vrr_get_config() to overwrite crtc_vblank_start with the same value (vtotal - guardband) on platforms with always-on VRR TG. This also paves way for guardband optimization, by handling the movement of the crtc_vblank_start for platforms that have VRR TG always active. v2: Drop the helper and add the adjustment directly to intel_vrr_compute_guardband(). (Ville) v3: Use adjusted_mode.crtc_vtotal instead of vmin and include the readout logic to keep the compute and readout paths in sync. (Ville) v4: Also set pipe_mode->crtc_vblank_start as its derived from adjusted_mode. (Ville) v5: Add a comment about rationale behind updating pipe_mode->crtc_vblank_start. (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251016055415.2101347-10-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 8d71d7dc9d12..597008a6c744 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -436,7 +436,8 @@ intel_vrr_max_guardband(struct intel_crtc_state *crtc_state) void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + struct drm_display_mode *pipe_mode = &crtc_state->hw.pipe_mode; if (!intel_vrr_possible(crtc_state)) return; @@ -444,6 +445,17 @@ void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state) crtc_state->vrr.guardband = min(crtc_state->vrr.vmin - adjusted_mode->crtc_vdisplay, intel_vrr_max_guardband(crtc_state)); + if (intel_vrr_always_use_vrr_tg(display)) { + adjusted_mode->crtc_vblank_start = + adjusted_mode->crtc_vtotal - crtc_state->vrr.guardband; + /* + * pipe_mode has already been derived from the + * original adjusted_mode, keep the two in sync. + */ + pipe_mode->crtc_vblank_start = + adjusted_mode->crtc_vblank_start; + } + if (DISPLAY_VER(display) < 13) crtc_state->vrr.pipeline_full = intel_vrr_guardband_to_pipeline_full(crtc_state, @@ -821,6 +833,19 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) */ if (crtc_state->vrr.enable) crtc_state->mode_flags |= I915_MODE_FLAG_VRR; + + /* + * For platforms that always use the VRR timing generator, we overwrite + * crtc_vblank_start with vtotal - guardband to reflect the delayed + * vblank start. This works for both default and optimized guardband values. + * On other platforms, we keep the original value from + * intel_get_transcoder_timings() and apply adjustments only in VRR-specific + * paths as needed. + */ + if (intel_vrr_always_use_vrr_tg(display)) + crtc_state->hw.adjusted_mode.crtc_vblank_start = + crtc_state->hw.adjusted_mode.crtc_vtotal - + crtc_state->vrr.guardband; } int intel_vrr_safe_window_start(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 755e430b1eed8234d22c7aa6c277f2bef7202aa9 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 16 Oct 2025 11:24:14 +0530 Subject: drm/i915/display: Prepare for vblank_delay for LRR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update allow_vblank_delay_fastset() to permit vblank delay adjustments during with LRR when VRR TG is always active. Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251016055415.2101347-11-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ceee5ae99c2c..65a7da694ef6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4958,9 +4958,15 @@ static bool allow_vblank_delay_fastset(const struct intel_crtc_state *old_crtc_s * Allow fastboot to fix up vblank delay (handled via LRR * codepaths), a bit dodgy as the registers aren't * double buffered but seems to be working more or less... + * + * Also allow this when the VRR timing generator is always on, + * and optimized guardband is used. In such cases, + * vblank delay may vary even without inherited state, but it's + * still safe as VRR guardband is still same. */ - return HAS_LRR(display) && old_crtc_state->inherited && - !intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DSI); + return HAS_LRR(display) && + (old_crtc_state->inherited || intel_vrr_always_use_vrr_tg(display)) && + !intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DSI); } bool -- cgit v1.2.3 From 5021ccb44dc6a22a3508316442d1304371ca55db Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:01 +0300 Subject: drm/i915: Reject modes with linetime > 64 usec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reject modes whose linetime exceeds 64 usec. First reason being that WM_LINETIME is limited to (nearly) 64 usec. Additionally knowing the linetime is bounded will help with determining whether overflows may be a concern during various calculations. I decided to round up, and accept the linetime==64 case. We use various rounding directions for this in other parts of the code, so I feel this provides the most consistent result all around. Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 65a7da694ef6..4367ecfab2b3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7978,6 +7978,14 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + /* + * WM_LINETIME only goes up to (almost) 64 usec, and also + * knowing that the linetime is always bounded will ease the + * mind during various calculations. + */ + if (DIV_ROUND_UP(mode->htotal * 1000, mode->clock) > 64) + return MODE_H_ILLEGAL; + return MODE_OK; } -- cgit v1.2.3 From 2b4c2a5e4d7b26b884f3861108becae0098e3233 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:02 +0300 Subject: drm/i915/cdclk: Add prefill helpers for CDCLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers to compute the CDCLKl adjustment factor for prefill calculations. The adjustment factor is always <= 1.0. That is, a faster CDCLK speeds up the pipe prefill. intel_cdclk_prefill_adjustment_worst() gives out a worst case estimate, meant to be used during guardband sizing. We can actually do better than 1.0 here because the absolute minimum CDCLK is limited by the dotclock. This will still allow planes, pfit, etc. to be changed any which way without having to resize the guardband yet again. intel_cdclk_prefill_adjustment() is supposed to give a more accurate value based on the current min cdclk for the pipe, but currently that is not yet available when this gets called. So for now use the same worst case estimate here. The returned numbers are in .16 binary fixed point. TODO: the intel_cdclk_prefill_adjustment_worst() approach here can result in guardband changes for DRRS. But I'm thinking that is fine since M/N changes will always happen on the legacy timing generator so guardband doesn't actually matter. May need to think about this a bit more though... v2: Use the worst case estimate always for now Reviewed-by: Uma Shankar #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 68 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_cdclk.h | 3 ++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f2e092f89ddd..10abc2521fab 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2806,16 +2806,20 @@ static int intel_cdclk_guardband(struct intel_display *display) return 90; } -static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) +static int _intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state, int pixel_rate) { struct intel_display *display = to_intel_display(crtc_state); int ppc = intel_cdclk_ppc(display, crtc_state->double_wide); int guardband = intel_cdclk_guardband(display); - int pixel_rate = crtc_state->pixel_rate; return DIV_ROUND_UP(pixel_rate * 100, guardband * ppc); } +static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) +{ + return _intel_pixel_rate_to_cdclk(crtc_state, crtc_state->pixel_rate); +} + static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -4056,3 +4060,63 @@ void intel_cdclk_read_hw(struct intel_display *display) cdclk_state->actual = display->cdclk.hw; cdclk_state->logical = display->cdclk.hw; } + +static int calc_cdclk(const struct intel_crtc_state *crtc_state, int min_cdclk) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (DISPLAY_VER(display) >= 10 || display->platform.broxton) { + return bxt_calc_cdclk(display, min_cdclk); + } else if (DISPLAY_VER(display) == 9) { + int vco; + + vco = display->cdclk.skl_preferred_vco_freq; + if (vco == 0) + vco = 8100000; + + return skl_calc_cdclk(min_cdclk, vco); + } else if (display->platform.broadwell) { + return bdw_calc_cdclk(min_cdclk); + } else if (display->platform.cherryview || display->platform.valleyview) { + return vlv_calc_cdclk(display, min_cdclk); + } else { + return display->cdclk.max_cdclk_freq; + } +} + +static unsigned int _intel_cdclk_prefill_adj(const struct intel_crtc_state *crtc_state, + int clock, int min_cdclk) +{ + struct intel_display *display = to_intel_display(crtc_state); + int ppc = intel_cdclk_ppc(display, crtc_state->double_wide); + int cdclk = calc_cdclk(crtc_state, min_cdclk); + + return min(0x10000, DIV_ROUND_UP_ULL((u64)clock << 16, ppc * cdclk)); +} + +unsigned int intel_cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state) +{ + /* FIXME use the actual min_cdclk for the pipe here */ + return intel_cdclk_prefill_adjustment_worst(crtc_state); +} + +unsigned int intel_cdclk_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state) +{ + int clock = crtc_state->hw.pipe_mode.crtc_clock; + int min_cdclk; + + /* + * FIXME could perhaps consider a few more of the factors + * that go the per-crtc min_cdclk. Namely anything that + * only changes during full modesets. + * + * FIXME this assumes 1:1 scaling, but the other _worst() stuff + * assumes max downscaling, so the final result will be + * unrealistically bad. Figure out where the actual maximum value + * lies and use that to compute a more realistic worst case + * estimate... + */ + min_cdclk = _intel_pixel_rate_to_cdclk(crtc_state, clock); + + return _intel_cdclk_prefill_adj(crtc_state, clock, min_cdclk); +} diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 72963f6f399a..8774a320670b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -70,4 +70,7 @@ bool intel_cdclk_pmdemand_needs_update(struct intel_atomic_state *state); void intel_cdclk_force_min_cdclk(struct intel_cdclk_state *cdclk_state, int force_min_cdclk); void intel_cdclk_read_hw(struct intel_display *display); +unsigned int intel_cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state); +unsigned int intel_cdclk_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); + #endif /* __INTEL_CDCLK_H__ */ -- cgit v1.2.3 From 4157c75604c7adc87e1a5ae94074621bfad5a3fe Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:03 +0300 Subject: drm/i915/cdclk: Add intel_cdclk_min_cdclk_for_prefill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a helper to compute the min required cdclk frequency for a given guardband size. This could be used to bump up the cdclk in case the vblank is so small that the normally computed minimum cdclk results in too slow a prefill. Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++++++ drivers/gpu/drm/i915/display/intel_cdclk.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 10abc2521fab..bd45b719d4f8 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -4120,3 +4120,15 @@ unsigned int intel_cdclk_prefill_adjustment_worst(const struct intel_crtc_state return _intel_cdclk_prefill_adj(crtc_state, clock, min_cdclk); } + +int intel_cdclk_min_cdclk_for_prefill(const struct intel_crtc_state *crtc_state, + unsigned int prefill_lines_unadjusted, + unsigned int prefill_lines_available) +{ + struct intel_display *display = to_intel_display(crtc_state); + const struct drm_display_mode *pipe_mode = &crtc_state->hw.pipe_mode; + int ppc = intel_cdclk_ppc(display, crtc_state->double_wide); + + return DIV_ROUND_UP_ULL(mul_u32_u32(pipe_mode->crtc_clock, prefill_lines_unadjusted), + ppc * prefill_lines_available); +} diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 8774a320670b..1c1140b53b17 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -72,5 +72,8 @@ void intel_cdclk_read_hw(struct intel_display *display); unsigned int intel_cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state); unsigned int intel_cdclk_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); +int intel_cdclk_min_cdclk_for_prefill(const struct intel_crtc_state *crtc_state, + unsigned int prefill_lines_unadjusted, + unsigned int prefill_lines_available); #endif /* __INTEL_CDCLK_H__ */ -- cgit v1.2.3 From d7cc4b6bc951c52d6dfc0e3cbcc81e900100c2a6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:04 +0300 Subject: drm/i915/dsc: Add prefill helper for DSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add intel_vdsc_prefill_lines() which tells us how many extra lines of latency the DSC adds to the pipe prefill. We shouldn't need a "worst case" vs, "current case" split here as the DSC state should only change during full modesets. The returned numbers are in .16 binary fixed point. Reviewed-by: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_vdsc.c | 8 ++++++++ drivers/gpu/drm/i915/display/intel_vdsc.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 8e799e225af1..bca747e24a7f 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -1077,3 +1077,11 @@ int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) return min_cdclk; } + +unsigned int intel_vdsc_prefill_lines(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->dsc.compression_enable) + return 0; + + return 0x18000; /* 1.5 */ +} diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 9e2812f99dd7..2139391ff881 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -32,5 +32,6 @@ void intel_dsc_dp_pps_write(struct intel_encoder *encoder, void intel_vdsc_state_dump(struct drm_printer *p, int indent, const struct intel_crtc_state *crtc_state); int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state); +unsigned int intel_vdsc_prefill_lines(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VDSC_H__ */ -- cgit v1.2.3 From b95e31cdfc87c780249ac3fcec2198ceb50d140d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 15 Oct 2025 15:56:45 +0300 Subject: drm/i915/scaler: Add scaler prefill helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers to compute the required prefill line count and adjustment factors for the scalers. The "1st" variants hand out numbers for the first scaler stage in the pipeline (pipe scaler if no plane scalers are enabled, or the max from all the plane scaler). The "2nd" variants deal with second scaler stage (pipe scaler when plane scaling is also enabled, otherwise there is no second stage). The _worst() variants give out worst case estimates, meant for guardband sizing. The other variants are meant for the actual vblank/guardband length check vs. prefill+pkgc/sagv latency. The returned numbers are in .16 binary fixed point. TODO: pretty rough, should check the actual scaler max scaling factors instead of just assuming 3x everywhere TODO: Reorder scaler assignment vs. vblank length check to get the actual scale factors v2: Drop debugs v3: Ignore scale factors for the vblank length check for now since we don't have the scalers assigned yet Reviewed-by: Uma Shankar #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251015125645.11230-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_scaler.c | 111 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/skl_scaler.h | 11 +++ 2 files changed, 122 insertions(+) diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index c6cccf170ff1..630fcf1cb9ac 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -968,3 +968,114 @@ void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state) 1); intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, 0); } + +unsigned int skl_scaler_1st_prefill_adjustment(const struct intel_crtc_state *crtc_state) +{ + /* + * FIXME don't have scalers assigned yet + * so can't look up the scale factors + */ + return 0x10000; +} + +unsigned int skl_scaler_2nd_prefill_adjustment(const struct intel_crtc_state *crtc_state) +{ + /* + * FIXME don't have scalers assigned yet + * so can't look up the scale factors + */ + return 0x10000; +} + +unsigned int skl_scaler_1st_prefill_lines(const struct intel_crtc_state *crtc_state) +{ + const struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + int num_scalers = hweight32(scaler_state->scaler_users); + + if (num_scalers > 0) + return 4 << 16; + + return 0; +} + +unsigned int skl_scaler_2nd_prefill_lines(const struct intel_crtc_state *crtc_state) +{ + const struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + int num_scalers = hweight32(scaler_state->scaler_users); + + if (num_scalers > 1 && crtc_state->pch_pfit.enabled) + return 4 << 16; + + return 0; +} + +static unsigned int _skl_scaler_max_scale(const struct intel_crtc_state *crtc_state, + unsigned int max_scale) +{ + struct intel_display *display = to_intel_display(crtc_state); + + /* + * Downscaling requires increasing cdclk, so max scale + * factor is limited to the max_dotclock/dotclock ratio. + * + * FIXME find out the max downscale factors properly + */ + return min(max_scale, DIV_ROUND_UP_ULL((u64)display->cdclk.max_dotclk_freq << 16, + crtc_state->hw.pipe_mode.crtc_clock)); +} + +static unsigned int skl_scaler_max_scale(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + unsigned int max_scale; + + if (crtc->num_scalers < 1) + return 0x10000; + + /* FIXME find out the max downscale factors properly */ + max_scale = 9 << 16; + + return _skl_scaler_max_scale(crtc_state, max_scale); +} + +unsigned int skl_scaler_1st_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + if (crtc->num_scalers > 0) + return skl_scaler_max_scale(crtc_state); + else + return 0x10000; +} + +unsigned int skl_scaler_2nd_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + if (crtc->num_scalers > 1) + return skl_scaler_max_scale(crtc_state); + else + return 0x10000; +} + +unsigned int skl_scaler_1st_prefill_lines_worst(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + if (crtc->num_scalers > 0) + return 4 << 16; + else + return 0; +} + +unsigned int skl_scaler_2nd_prefill_lines_worst(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + if (crtc->num_scalers > 1) + return 4 << 16; + else + return 0; +} diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 12a19016c5f6..6fab40d2b4ee 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -45,4 +45,15 @@ skl_scaler_mode_valid(struct intel_display *display, void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state); void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state); + +unsigned int skl_scaler_1st_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_2nd_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_1st_prefill_lines_worst(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_2nd_prefill_lines_worst(const struct intel_crtc_state *crtc_state); + +unsigned int skl_scaler_1st_prefill_adjustment(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_2nd_prefill_adjustment(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_1st_prefill_lines(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_2nd_prefill_lines(const struct intel_crtc_state *crtc_state); + #endif -- cgit v1.2.3 From 9704c1cbedfd05a6a29017d580a2819964420511 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:06 +0300 Subject: drm/i915/wm: Add WM0 prefill helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add skl_wm0_prefill_lines() (based on the actual state) and skl_wm0_prefill_lines_worst() (worst case estimate) which tell us how many extra lines are needed in prefill for WM0. The returned numbers are in .16 binary fixed point. TODO: skl_wm0_prefill_lines_worst() is a bit rough still v2: Drop all pre-icl FIXMEs since this only gets used for VRR guardband Reviewed-by: Uma Shankar #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_scaler.c | 32 +++++++++++++++- drivers/gpu/drm/i915/display/skl_scaler.h | 4 ++ drivers/gpu/drm/i915/display/skl_watermark.c | 57 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/skl_watermark.h | 3 ++ 4 files changed, 95 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 630fcf1cb9ac..d29efcbf2319 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -1026,7 +1026,37 @@ static unsigned int _skl_scaler_max_scale(const struct intel_crtc_state *crtc_st crtc_state->hw.pipe_mode.crtc_clock)); } -static unsigned int skl_scaler_max_scale(const struct intel_crtc_state *crtc_state) +unsigned int skl_scaler_max_total_scale(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + unsigned int max_scale; + + if (crtc->num_scalers < 1) + return 0x10000; + + /* FIXME find out the max downscale factors properly */ + max_scale = 9 << 16; + if (crtc->num_scalers > 1) + max_scale *= 9; + + return _skl_scaler_max_scale(crtc_state, max_scale); +} + +unsigned int skl_scaler_max_hscale(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + unsigned int max_scale; + + if (crtc->num_scalers < 1) + return 0x10000; + + /* FIXME find out the max downscale factors properly */ + max_scale = 3 << 16; + + return _skl_scaler_max_scale(crtc_state, max_scale); +} + +unsigned int skl_scaler_max_scale(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); unsigned int max_scale; diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 6fab40d2b4ee..5deabca909e6 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -46,6 +46,10 @@ void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state); void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_max_total_scale(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_max_scale(const struct intel_crtc_state *crtc_state); +unsigned int skl_scaler_max_hscale(const struct intel_crtc_state *crtc_state); + unsigned int skl_scaler_1st_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); unsigned int skl_scaler_2nd_prefill_adjustment_worst(const struct intel_crtc_state *crtc_state); unsigned int skl_scaler_1st_prefill_lines_worst(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 06e5e6c77d2e..700707a91c70 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -30,6 +30,7 @@ #include "intel_plane.h" #include "intel_vblank.h" #include "intel_wm.h" +#include "skl_scaler.h" #include "skl_universal_plane_regs.h" #include "skl_watermark.h" #include "skl_watermark_regs.h" @@ -2245,6 +2246,57 @@ skl_is_vblank_too_short(const struct intel_crtc_state *crtc_state, intel_crtc_vblank_length(crtc_state); } +unsigned int skl_wm0_prefill_lines_worst(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_plane *plane = to_intel_plane(crtc_state->uapi.crtc->primary); + const struct drm_display_mode *pipe_mode = &crtc_state->hw.pipe_mode; + int ret, pixel_rate, width, level = 0; + const struct drm_format_info *info; + struct skl_wm_level wm = {}; + struct skl_wm_params wp; + unsigned int latency; + u64 modifier; + u32 format; + + /* only expected to be used for VRR guardband calculation */ + drm_WARN_ON(display->drm, !HAS_VRR(display)); + + /* FIXME rather ugly to pick this by hand but maybe no better way? */ + format = DRM_FORMAT_XBGR16161616F; + if (HAS_4TILE(display)) + modifier = I915_FORMAT_MOD_4_TILED; + else + modifier = I915_FORMAT_MOD_Y_TILED; + + info = drm_get_format_info(display->drm, format, modifier); + + pixel_rate = DIV_ROUND_UP_ULL(mul_u32_u32(skl_scaler_max_total_scale(crtc_state), + pipe_mode->crtc_clock), + 0x10000); + + /* FIXME limit to max plane width? */ + width = DIV_ROUND_UP_ULL(mul_u32_u32(skl_scaler_max_hscale(crtc_state), + pipe_mode->crtc_hdisplay), + 0x10000); + + /* FIXME is 90/270 rotation worse than 0/180? */ + ret = skl_compute_wm_params(crtc_state, width, info, + modifier, DRM_MODE_ROTATE_0, + pixel_rate, &wp, 0, 1); + drm_WARN_ON(display->drm, ret); + + latency = skl_wm_latency(display, level, &wp); + + skl_compute_plane_wm(crtc_state, plane, level, latency, &wp, &wm, &wm); + + /* FIXME is this sane? */ + if (wm.min_ddb_alloc == U16_MAX) + wm.lines = skl_wm_max_lines(display); + + return wm.lines << 16; +} + static int skl_max_wm0_lines(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -2261,6 +2313,11 @@ static int skl_max_wm0_lines(const struct intel_crtc_state *crtc_state) return wm0_lines; } +unsigned int skl_wm0_prefill_lines(const struct intel_crtc_state *crtc_state) +{ + return skl_max_wm0_lines(crtc_state) << 16; +} + /* * TODO: In case we use PKG_C_LATENCY to allow C-states when the delayed vblank * size is too small for the package C exit latency we need to notify PSR about diff --git a/drivers/gpu/drm/i915/display/skl_watermark.h b/drivers/gpu/drm/i915/display/skl_watermark.h index 62790816f030..6bc2ec9164bf 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.h +++ b/drivers/gpu/drm/i915/display/skl_watermark.h @@ -79,5 +79,8 @@ void intel_program_dpkgc_latency(struct intel_atomic_state *state); bool intel_dbuf_pmdemand_needs_update(struct intel_atomic_state *state); +unsigned int skl_wm0_prefill_lines_worst(const struct intel_crtc_state *crtc_state); +unsigned int skl_wm0_prefill_lines(const struct intel_crtc_state *crtc_state); + #endif /* __SKL_WATERMARK_H__ */ -- cgit v1.2.3 From ba470a99f228a5bbc39d53f52d52a98ba97eed03 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:07 +0300 Subject: drm/i915/prefill: Introduce skl_prefill.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new helper thingy to deal with the pipe prefill latency. We get three potentially useful thigns out of this: - skl_prefill_vblank_too_short() used for checking the actual vblank/guardband length - skl_prefill_min_guardband() to calculate a suitable guardband size based on some worst case scaling/etc. estimates - skl_prefill_min_cdclk() used to calculate a minimum cdclk frequency required for very small vblank lengths (in case the otherwise computed minimum cdclk doesn't result in fast enough prefill). The internal arithmetic is done terms of scanlines using .16 binary fixed point representation. v2: Add the missing <<16 for framestart_delay Drop the cdclk_state stuff in favor of crtc_state->min_cdclk Rename to skl_prefill since this is skl+ only Use intel_crtc_vblank_length() instead of hand rolling it memset(0) in prefill_init() Reviewed-by: Uma Shankar #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/skl_prefill.c | 157 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/skl_prefill.h | 46 +++++++++ drivers/gpu/drm/xe/Makefile | 1 + 4 files changed, 205 insertions(+) create mode 100644 drivers/gpu/drm/i915/display/skl_prefill.c create mode 100644 drivers/gpu/drm/i915/display/skl_prefill.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6d7800e25e55..aa2f0fd95117 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -302,6 +302,7 @@ i915-y += \ display/intel_vblank.o \ display/intel_vga.o \ display/intel_wm.o \ + display/skl_prefill.o \ display/skl_scaler.o \ display/skl_universal_plane.o \ display/skl_watermark.o \ diff --git a/drivers/gpu/drm/i915/display/skl_prefill.c b/drivers/gpu/drm/i915/display/skl_prefill.c new file mode 100644 index 000000000000..4707c2e7127a --- /dev/null +++ b/drivers/gpu/drm/i915/display/skl_prefill.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include + +#include + +#include "intel_cdclk.h" +#include "intel_display_core.h" +#include "intel_display_types.h" +#include "intel_vblank.h" +#include "intel_vdsc.h" +#include "skl_prefill.h" +#include "skl_scaler.h" +#include "skl_watermark.h" + +static unsigned int prefill_usecs_to_lines(const struct intel_crtc_state *crtc_state, + unsigned int usecs) +{ + const struct drm_display_mode *pipe_mode = &crtc_state->hw.pipe_mode; + + return DIV_ROUND_UP_ULL(mul_u32_u32(pipe_mode->crtc_clock, usecs << 16), + pipe_mode->crtc_htotal * 1000); +} + +static void prefill_init(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->prefill.fixed = crtc_state->framestart_delay << 16; + + /* 20 usec for translation walks/etc. */ + ctx->prefill.fixed += prefill_usecs_to_lines(crtc_state, 20); + + ctx->prefill.dsc = intel_vdsc_prefill_lines(crtc_state); +} + +static void prefill_init_nocdclk_worst(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + prefill_init(ctx, crtc_state); + + ctx->prefill.wm0 = skl_wm0_prefill_lines_worst(crtc_state); + ctx->prefill.scaler_1st = skl_scaler_1st_prefill_lines_worst(crtc_state); + ctx->prefill.scaler_2nd = skl_scaler_2nd_prefill_lines_worst(crtc_state); + + ctx->adj.scaler_1st = skl_scaler_1st_prefill_adjustment_worst(crtc_state); + ctx->adj.scaler_2nd = skl_scaler_2nd_prefill_adjustment_worst(crtc_state); +} + +static void prefill_init_nocdclk(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + prefill_init(ctx, crtc_state); + + ctx->prefill.wm0 = skl_wm0_prefill_lines(crtc_state); + ctx->prefill.scaler_1st = skl_scaler_1st_prefill_lines(crtc_state); + ctx->prefill.scaler_2nd = skl_scaler_2nd_prefill_lines(crtc_state); + + ctx->adj.scaler_1st = skl_scaler_1st_prefill_adjustment(crtc_state); + ctx->adj.scaler_2nd = skl_scaler_2nd_prefill_adjustment(crtc_state); +} + +static unsigned int prefill_adjust(unsigned int value, unsigned int factor) +{ + return DIV_ROUND_UP_ULL(mul_u32_u32(value, factor), 0x10000); +} + +static unsigned int prefill_lines_nocdclk(const struct skl_prefill_ctx *ctx) +{ + unsigned int prefill = 0; + + prefill += ctx->prefill.dsc; + prefill = prefill_adjust(prefill, ctx->adj.scaler_2nd); + + prefill += ctx->prefill.scaler_2nd; + prefill = prefill_adjust(prefill, ctx->adj.scaler_1st); + + prefill += ctx->prefill.scaler_1st; + prefill += ctx->prefill.wm0; + + return prefill; +} + +static unsigned int prefill_lines_cdclk(const struct skl_prefill_ctx *ctx) +{ + return prefill_adjust(prefill_lines_nocdclk(ctx), ctx->adj.cdclk); +} + +static unsigned int prefill_lines_full(const struct skl_prefill_ctx *ctx) +{ + return ctx->prefill.fixed + prefill_lines_cdclk(ctx); +} + +void skl_prefill_init_worst(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + prefill_init_nocdclk_worst(ctx, crtc_state); + + ctx->adj.cdclk = intel_cdclk_prefill_adjustment_worst(crtc_state); + + ctx->prefill.full = prefill_lines_full(ctx); +} + +void skl_prefill_init(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + prefill_init_nocdclk(ctx, crtc_state); + + ctx->adj.cdclk = intel_cdclk_prefill_adjustment(crtc_state); + + ctx->prefill.full = prefill_lines_full(ctx); +} + +static unsigned int prefill_lines_with_latency(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state, + unsigned int latency_us) +{ + return ctx->prefill.full + prefill_usecs_to_lines(crtc_state, latency_us); +} + +int skl_prefill_min_guardband(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state, + unsigned int latency_us) +{ + unsigned int prefill = prefill_lines_with_latency(ctx, crtc_state, latency_us); + + return DIV_ROUND_UP(prefill, 0x10000); +} + +static unsigned int prefill_guardband(const struct intel_crtc_state *crtc_state) +{ + return intel_crtc_vblank_length(crtc_state) << 16; +} + +bool skl_prefill_vblank_too_short(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state, + unsigned int latency_us) +{ + unsigned int guardband = prefill_guardband(crtc_state); + unsigned int prefill = prefill_lines_with_latency(ctx, crtc_state, latency_us); + + return guardband < prefill; +} + +int skl_prefill_min_cdclk(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state) +{ + unsigned int prefill_unadjusted = prefill_lines_nocdclk(ctx); + unsigned int prefill_available = prefill_guardband(crtc_state) - ctx->prefill.fixed; + + return intel_cdclk_min_cdclk_for_prefill(crtc_state, prefill_unadjusted, + prefill_available); +} diff --git a/drivers/gpu/drm/i915/display/skl_prefill.h b/drivers/gpu/drm/i915/display/skl_prefill.h new file mode 100644 index 000000000000..028ee19b64ce --- /dev/null +++ b/drivers/gpu/drm/i915/display/skl_prefill.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __SKL_PREFILL_H__ +#define __SKL_PREFILL_H__ + +#include + +struct intel_crtc_state; + +struct skl_prefill_ctx { + /* .16 scanlines */ + struct { + unsigned int fixed; + unsigned int wm0; + unsigned int scaler_1st; + unsigned int scaler_2nd; + unsigned int dsc; + unsigned int full; + } prefill; + + /* .16 adjustment factors */ + struct { + unsigned int cdclk; + unsigned int scaler_1st; + unsigned int scaler_2nd; + } adj; +}; + +void skl_prefill_init_worst(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state); +void skl_prefill_init(struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state); + +bool skl_prefill_vblank_too_short(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state, + unsigned int latency_us); +int skl_prefill_min_guardband(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state, + unsigned int latency_us); +int skl_prefill_min_cdclk(const struct skl_prefill_ctx *ctx, + const struct intel_crtc_state *crtc_state); + +#endif /* __SKL_PREFILL_H__ */ diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index ac65722e5d38..815f0df2f83e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -308,6 +308,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ i915-display/intel_wm.o \ + i915-display/skl_prefill.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ i915-display/skl_watermark.o -- cgit v1.2.3 From 08c54f3c83e531d13f165ea341a312a49a4eb267 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Oct 2025 22:18:08 +0300 Subject: drm/i915/wm: Use skl_prefill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the current ad-hoc prefill calculations with skl_prefill. v2: cdclk_state no longer needed Rename to skl_prefill Reviewed-by: Uma Shankar #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251014191808.12326-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/skl_watermark.c | 126 +++++---------------------- 1 file changed, 20 insertions(+), 106 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 700707a91c70..256162da9afc 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -30,6 +30,7 @@ #include "intel_plane.h" #include "intel_vblank.h" #include "intel_wm.h" +#include "skl_prefill.h" #include "skl_scaler.h" #include "skl_universal_plane_regs.h" #include "skl_watermark.h" @@ -2147,105 +2148,6 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, return 0; } -static int -cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->uapi.state); - const struct intel_cdclk_state *cdclk_state; - - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) { - drm_WARN_ON(display->drm, PTR_ERR(cdclk_state)); - return 1; - } - - return min(1, DIV_ROUND_UP(crtc_state->pixel_rate, - 2 * intel_cdclk_logical(cdclk_state))); -} - -static int -dsc_prefill_latency(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - const struct intel_crtc_scaler_state *scaler_state = - &crtc_state->scaler_state; - int linetime = DIV_ROUND_UP(1000 * crtc_state->hw.adjusted_mode.htotal, - crtc_state->hw.adjusted_mode.clock); - int num_scaler_users = hweight32(scaler_state->scaler_users); - int chroma_downscaling_factor = - crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1; - u32 dsc_prefill_latency = 0; - - if (!crtc_state->dsc.compression_enable || - !num_scaler_users || - num_scaler_users > crtc->num_scalers) - return dsc_prefill_latency; - - dsc_prefill_latency = DIV_ROUND_UP(15 * linetime * chroma_downscaling_factor, 10); - - for (int i = 0; i < num_scaler_users; i++) { - u64 hscale_k, vscale_k; - - hscale_k = max(1000, mul_u32_u32(scaler_state->scalers[i].hscale, 1000) >> 16); - vscale_k = max(1000, mul_u32_u32(scaler_state->scalers[i].vscale, 1000) >> 16); - dsc_prefill_latency = DIV_ROUND_UP_ULL(dsc_prefill_latency * hscale_k * vscale_k, - 1000000); - } - - dsc_prefill_latency *= cdclk_prefill_adjustment(crtc_state); - - return intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, dsc_prefill_latency); -} - -static int -scaler_prefill_latency(const struct intel_crtc_state *crtc_state) -{ - const struct intel_crtc_scaler_state *scaler_state = - &crtc_state->scaler_state; - int num_scaler_users = hweight32(scaler_state->scaler_users); - int scaler_prefill_latency = 0; - int linetime = DIV_ROUND_UP(1000 * crtc_state->hw.adjusted_mode.htotal, - crtc_state->hw.adjusted_mode.clock); - - if (!num_scaler_users) - return scaler_prefill_latency; - - scaler_prefill_latency = 4 * linetime; - - if (num_scaler_users > 1) { - u64 hscale_k = max(1000, mul_u32_u32(scaler_state->scalers[0].hscale, 1000) >> 16); - u64 vscale_k = max(1000, mul_u32_u32(scaler_state->scalers[0].vscale, 1000) >> 16); - int chroma_downscaling_factor = - crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1; - int latency; - - latency = DIV_ROUND_UP_ULL((4 * linetime * hscale_k * vscale_k * - chroma_downscaling_factor), 1000000); - scaler_prefill_latency += latency; - } - - scaler_prefill_latency *= cdclk_prefill_adjustment(crtc_state); - - return intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, scaler_prefill_latency); -} - -static bool -skl_is_vblank_too_short(const struct intel_crtc_state *crtc_state, - int wm0_lines, int latency) -{ - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; - - return crtc_state->framestart_delay + - intel_usecs_to_scanlines(adjusted_mode, latency) + - scaler_prefill_latency(crtc_state) + - dsc_prefill_latency(crtc_state) + - wm0_lines > - intel_crtc_vblank_length(crtc_state); -} - unsigned int skl_wm0_prefill_lines_worst(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -2324,9 +2226,10 @@ unsigned int skl_wm0_prefill_lines(const struct intel_crtc_state *crtc_state) * the scenario to apply Wa_16025596647. */ static int skl_max_wm_level_for_vblank(struct intel_crtc_state *crtc_state, - int wm0_lines) + const struct skl_prefill_ctx *ctx) { struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); int level; for (level = display->wm.num_levels - 1; level >= 0; level--) { @@ -2341,10 +2244,13 @@ static int skl_max_wm_level_for_vblank(struct intel_crtc_state *crtc_state, if (level == 0) latency = 0; - if (!skl_is_vblank_too_short(crtc_state, wm0_lines, latency)) + if (!skl_prefill_vblank_too_short(ctx, crtc_state, latency)) return level; } + drm_dbg_kms(display->drm, "[CRTC:%d:%s] Not enough time in vblank for prefill\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; } @@ -2352,14 +2258,15 @@ static int skl_wm_check_vblank(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - int wm0_lines, level; + struct skl_prefill_ctx ctx; + int level; if (!crtc_state->hw.active) return 0; - wm0_lines = skl_max_wm0_lines(crtc_state); + skl_prefill_init(&ctx, crtc_state); - level = skl_max_wm_level_for_vblank(crtc_state, wm0_lines); + level = skl_max_wm_level_for_vblank(crtc_state, &ctx); if (level < 0) return level; @@ -2369,6 +2276,13 @@ static int skl_wm_check_vblank(struct intel_crtc_state *crtc_state) */ crtc_state->wm_level_disabled = level < display->wm.num_levels - 1; + /* + * TODO: assert that we are in fact using the maximum guardband + * if we end up disabling any WM levels here. Otherwise we clearly + * failed in using a realistic worst case prefill estimate when + * determining the guardband size. + */ + for (level++; level < display->wm.num_levels; level++) { enum plane_id plane_id; @@ -2387,8 +2301,8 @@ static int skl_wm_check_vblank(struct intel_crtc_state *crtc_state) if (DISPLAY_VER(display) >= 12 && display->sagv.block_time_us && - skl_is_vblank_too_short(crtc_state, wm0_lines, - display->sagv.block_time_us)) { + skl_prefill_vblank_too_short(&ctx, crtc_state, + display->sagv.block_time_us)) { enum plane_id plane_id; for_each_plane_id_on_crtc(crtc, plane_id) { -- cgit v1.2.3 From 27e21516914dc130a79aa895a5a26e18f0213a5a Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Thu, 9 Oct 2025 14:52:08 -0700 Subject: drm/i915: move and rename reg_in_range_table reg_in_range_table is a useful function that is used in multiple places, and will be needed for WA_BB implementation later. Let's move this function and i915_range struct to its own file, as we are trying to move away from i915_utils files. v2: move functions to their own file v3: use correct naming convention Suggested-by: Rodrigo Vivi Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20251009215210.41000-1-matthew.s.atwood@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 ++-- drivers/gpu/drm/i915/i915_mmio_range.c | 18 +++++++ drivers/gpu/drm/i915/i915_mmio_range.h | 19 ++++++++ drivers/gpu/drm/i915/i915_perf.c | 67 +++++++++++---------------- drivers/gpu/drm/i915/intel_uncore.c | 15 +++--- drivers/gpu/drm/i915/intel_uncore.h | 8 +--- drivers/gpu/drm/i915/selftests/intel_uncore.c | 4 +- 8 files changed, 82 insertions(+), 59 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_mmio_range.c create mode 100644 drivers/gpu/drm/i915/i915_mmio_range.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index aa2f0fd95117..14ed66bfed8e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,6 +31,7 @@ i915-y += \ i915_ioctl.o \ i915_irq.o \ i915_mitigations.o \ + i915_mmio_range.o \ i915_module.o \ i915_params.o \ i915_pci.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 7d486dfa2fc1..ece88c612e27 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -5,6 +5,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_mmio_range.h" #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_engine_regs.h" @@ -2923,7 +2924,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(&engine->wa_list); } -static const struct i915_range mcr_ranges_gen8[] = { +static const struct i915_mmio_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2932,7 +2933,7 @@ static const struct i915_range mcr_ranges_gen8[] = { {}, }; -static const struct i915_range mcr_ranges_gen12[] = { +static const struct i915_mmio_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2941,7 +2942,7 @@ static const struct i915_range mcr_ranges_gen12[] = { {}, }; -static const struct i915_range mcr_ranges_xehp[] = { +static const struct i915_mmio_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2960,7 +2961,7 @@ static const struct i915_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct i915_range *mcr_ranges; + const struct i915_mmio_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) diff --git a/drivers/gpu/drm/i915/i915_mmio_range.c b/drivers/gpu/drm/i915/i915_mmio_range.c new file mode 100644 index 000000000000..724041e81aa7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mmio_range.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "i915_mmio_range.h" + +bool i915_mmio_range_table_contains(u32 addr, const struct i915_mmio_range *table) +{ + while (table->start || table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/i915_mmio_range.h b/drivers/gpu/drm/i915/i915_mmio_range.h new file mode 100644 index 000000000000..f1c7086d3e3c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mmio_range.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __I915_MMIO_RANGE_H__ +#define __I915_MMIO_RANGE_H__ + +#include + +/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ +struct i915_mmio_range { + u32 start; + u32 end; +}; + +bool i915_mmio_range_table_contains(u32 addr, const struct i915_mmio_range *table); + +#endif /* __I915_MMIO_RANGE_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1658f1246c6f..0b9d9f3f7813 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -219,6 +219,7 @@ #include "i915_perf.h" #include "i915_perf_oa_regs.h" #include "i915_reg.h" +#include "i915_mmio_range.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -4320,29 +4321,17 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) return false; } -static bool reg_in_range_table(u32 addr, const struct i915_range *table) -{ - while (table->start || table->end) { - if (addr >= table->start && addr <= table->end) - return true; - - table++; - } - - return false; -} - #define REG_EQUAL(addr, mmio) \ ((addr) == i915_mmio_reg_offset(mmio)) -static const struct i915_range gen7_oa_b_counters[] = { +static const struct i915_mmio_range gen7_oa_b_counters[] = { { .start = 0x2710, .end = 0x272c }, /* OASTARTTRIG[1-8] */ { .start = 0x2740, .end = 0x275c }, /* OAREPORTTRIG[1-8] */ { .start = 0x2770, .end = 0x27ac }, /* OACEC[0-7][0-1] */ {} }; -static const struct i915_range gen12_oa_b_counters[] = { +static const struct i915_mmio_range gen12_oa_b_counters[] = { { .start = 0x2b2c, .end = 0x2b2c }, /* GEN12_OAG_OA_PESS */ { .start = 0xd900, .end = 0xd91c }, /* GEN12_OAG_OASTARTTRIG[1-8] */ { .start = 0xd920, .end = 0xd93c }, /* GEN12_OAG_OAREPORTTRIG1[1-8] */ @@ -4353,7 +4342,7 @@ static const struct i915_range gen12_oa_b_counters[] = { {} }; -static const struct i915_range mtl_oam_b_counters[] = { +static const struct i915_mmio_range mtl_oam_b_counters[] = { { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */ @@ -4361,43 +4350,43 @@ static const struct i915_range mtl_oam_b_counters[] = { {} }; -static const struct i915_range xehp_oa_b_counters[] = { +static const struct i915_mmio_range xehp_oa_b_counters[] = { { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ {} }; -static const struct i915_range gen7_oa_mux_regs[] = { +static const struct i915_mmio_range gen7_oa_mux_regs[] = { { .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */ { .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */ { .start = 0xe180, .end = 0xe180 }, /* HALF_SLICE_CHICKEN2 */ {} }; -static const struct i915_range hsw_oa_mux_regs[] = { +static const struct i915_mmio_range hsw_oa_mux_regs[] = { { .start = 0x09e80, .end = 0x09ea4 }, /* HSW_MBVID2_NOA[0-9] */ { .start = 0x09ec0, .end = 0x09ec0 }, /* HSW_MBVID2_MISR0 */ { .start = 0x25100, .end = 0x2ff90 }, {} }; -static const struct i915_range chv_oa_mux_regs[] = { +static const struct i915_mmio_range chv_oa_mux_regs[] = { { .start = 0x182300, .end = 0x1823a4 }, {} }; -static const struct i915_range gen8_oa_mux_regs[] = { +static const struct i915_mmio_range gen8_oa_mux_regs[] = { { .start = 0x0d00, .end = 0x0d2c }, /* RPM_CONFIG[0-1], NOA_CONFIG[0-8] */ { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ {} }; -static const struct i915_range gen11_oa_mux_regs[] = { +static const struct i915_mmio_range gen11_oa_mux_regs[] = { { .start = 0x91c8, .end = 0x91dc }, /* OA_PERFCNT[3-4] */ {} }; -static const struct i915_range gen12_oa_mux_regs[] = { +static const struct i915_mmio_range gen12_oa_mux_regs[] = { { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ @@ -4410,7 +4399,7 @@ static const struct i915_range gen12_oa_mux_regs[] = { * Ref: 14010536224: * 0x20cc is repurposed on MTL, so use a separate array for MTL. */ -static const struct i915_range mtl_oa_mux_regs[] = { +static const struct i915_mmio_range mtl_oa_mux_regs[] = { { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ @@ -4421,61 +4410,61 @@ static const struct i915_range mtl_oa_mux_regs[] = { static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen7_oa_b_counters); + return i915_mmio_range_table_contains(addr, gen7_oa_b_counters); } static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen7_oa_mux_regs) || - reg_in_range_table(addr, gen8_oa_mux_regs); + return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) || + i915_mmio_range_table_contains(addr, gen8_oa_mux_regs); } static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen7_oa_mux_regs) || - reg_in_range_table(addr, gen8_oa_mux_regs) || - reg_in_range_table(addr, gen11_oa_mux_regs); + return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) || + i915_mmio_range_table_contains(addr, gen8_oa_mux_regs) || + i915_mmio_range_table_contains(addr, gen11_oa_mux_regs); } static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen7_oa_mux_regs) || - reg_in_range_table(addr, hsw_oa_mux_regs); + return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) || + i915_mmio_range_table_contains(addr, hsw_oa_mux_regs); } static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen7_oa_mux_regs) || - reg_in_range_table(addr, chv_oa_mux_regs); + return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) || + i915_mmio_range_table_contains(addr, chv_oa_mux_regs); } static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, gen12_oa_b_counters); + return i915_mmio_range_table_contains(addr, gen12_oa_b_counters); } static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr) { if (HAS_OAM(perf->i915) && GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) - return reg_in_range_table(addr, mtl_oam_b_counters); + return i915_mmio_range_table_contains(addr, mtl_oam_b_counters); return false; } static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return reg_in_range_table(addr, xehp_oa_b_counters) || - reg_in_range_table(addr, gen12_oa_b_counters) || + return i915_mmio_range_table_contains(addr, xehp_oa_b_counters) || + i915_mmio_range_table_contains(addr, gen12_oa_b_counters) || mtl_is_valid_oam_b_counter_addr(perf, addr); } static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) - return reg_in_range_table(addr, mtl_oa_mux_regs); + return i915_mmio_range_table_contains(addr, mtl_oa_mux_regs); else - return reg_in_range_table(addr, gen12_oa_mux_regs); + return i915_mmio_range_table_contains(addr, gen12_oa_mux_regs); } static u32 mask_reg_value(u32 reg, u32 val) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8cb59f8d1f4c..aba90b80854f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -35,6 +35,7 @@ #include "i915_reg.h" #include "i915_vgpu.h" #include "i915_wait_util.h" +#include "i915_mmio_range.h" #include "intel_uncore_trace.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 @@ -999,7 +1000,7 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) * scanned for obvious mistakes or typos by the selftests. */ -static const struct i915_range gen8_shadowed_regs[] = { +static const struct i915_mmio_range gen8_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0xA008, .end = 0xA00C }, { .start = 0x12030, .end = 0x12030 }, @@ -1007,7 +1008,7 @@ static const struct i915_range gen8_shadowed_regs[] = { { .start = 0x22030, .end = 0x22030 }, }; -static const struct i915_range gen11_shadowed_regs[] = { +static const struct i915_mmio_range gen11_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0x2550, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, @@ -1034,7 +1035,7 @@ static const struct i915_range gen11_shadowed_regs[] = { { .start = 0x1D8510, .end = 0x1D8550 }, }; -static const struct i915_range gen12_shadowed_regs[] = { +static const struct i915_mmio_range gen12_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, @@ -1078,7 +1079,7 @@ static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x1F8510, .end = 0x1F8550 }, }; -static const struct i915_range dg2_shadowed_regs[] = { +static const struct i915_mmio_range dg2_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, @@ -1117,7 +1118,7 @@ static const struct i915_range dg2_shadowed_regs[] = { { .start = 0x1F8510, .end = 0x1F8550 }, }; -static const struct i915_range mtl_shadowed_regs[] = { +static const struct i915_mmio_range mtl_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, @@ -1135,7 +1136,7 @@ static const struct i915_range mtl_shadowed_regs[] = { { .start = 0x22510, .end = 0x22550 }, }; -static const struct i915_range xelpmp_shadowed_regs[] = { +static const struct i915_mmio_range xelpmp_shadowed_regs[] = { { .start = 0x1C0030, .end = 0x1C0030 }, { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C8030, .end = 0x1C8030 }, @@ -1156,7 +1157,7 @@ static const struct i915_range xelpmp_shadowed_regs[] = { { .start = 0x38CFD4, .end = 0x38CFDC }, }; -static int mmio_range_cmp(u32 key, const struct i915_range *range) +static int mmio_range_cmp(u32 key, const struct i915_mmio_range *range) { if (key < range->start) return -1; diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 6048b99b96cb..fafc2ca9a237 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -123,12 +123,6 @@ struct intel_forcewake_range { enum forcewake_domains domains; }; -/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ -struct i915_range { - u32 start; - u32 end; -}; - struct intel_uncore { void __iomem *regs; @@ -162,7 +156,7 @@ struct intel_uncore { * Shadowed registers are special cases where we can safely write * to the register *without* grabbing forcewake. */ - const struct i915_range *shadowed_reg_table; + const struct i915_mmio_range *shadowed_reg_table; unsigned int shadowed_reg_table_entries; struct notifier_block pmic_bus_access_nb; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 58bcbdcef563..507bf42a1aaf 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -64,7 +64,7 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { struct { - const struct i915_range *regs; + const struct i915_mmio_range *regs; unsigned int size; } range_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, @@ -74,7 +74,7 @@ static int intel_shadow_table_check(void) { mtl_shadowed_regs, ARRAY_SIZE(mtl_shadowed_regs) }, { xelpmp_shadowed_regs, ARRAY_SIZE(xelpmp_shadowed_regs) }, }; - const struct i915_range *range; + const struct i915_mmio_range *range; unsigned int i, j; s32 prev; -- cgit v1.2.3 From ac930bab1c8985da7edeb4d5a266ab4e0cae2df0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:28 +0300 Subject: drm/i915/bw: Untangle dbuf bw from the sagv/mem bw stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently intel_bw.c contains basically three completely independent parts: - SAGV/memory bandwidth handling - DBuf bandwidth handling - "Maximum pipe read bandwidth" calculation, which is some kind of internal per-pipe bandwidth limit. Carve out the DBuf bandwdith handling into a separate file since there is no actual dependency between it and the rest of intel_bw.c. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-2-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_bw.c | 191 ------------- drivers/gpu/drm/i915/display/intel_bw.h | 4 - drivers/gpu/drm/i915/display/intel_cdclk.c | 32 +-- drivers/gpu/drm/i915/display/intel_cdclk.h | 7 +- drivers/gpu/drm/i915/display/intel_dbuf_bw.c | 295 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dbuf_bw.h | 37 +++ drivers/gpu/drm/i915/display/intel_display_core.h | 4 + .../gpu/drm/i915/display/intel_display_driver.c | 5 + drivers/gpu/drm/i915/display/intel_modeset_setup.c | 3 + drivers/gpu/drm/xe/Makefile | 1 + 11 files changed, 363 insertions(+), 217 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_dbuf_bw.c create mode 100644 drivers/gpu/drm/i915/display/intel_dbuf_bw.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 14ed66bfed8e..47bac9b2c611 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -242,6 +242,7 @@ i915-y += \ display/intel_crtc.o \ display/intel_crtc_state_dump.o \ display/intel_cursor.o \ + display/intel_dbuf_bw.o \ display/intel_display.o \ display/intel_display_conversion.o \ display/intel_display_driver.o \ diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index b53bcb693e79..a4d16711d336 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -3,16 +3,12 @@ * Copyright © 2019 Intel Corporation */ -#include - #include "soc/intel_dram.h" #include "i915_drv.h" #include "i915_reg.h" #include "i915_utils.h" -#include "intel_atomic.h" #include "intel_bw.h" -#include "intel_cdclk.h" #include "intel_crtc.h" #include "intel_display_core.h" #include "intel_display_regs.h" @@ -22,14 +18,8 @@ #include "intel_uncore.h" #include "skl_watermark.h" -struct intel_dbuf_bw { - unsigned int max_bw[I915_MAX_DBUF_SLICES]; - u8 active_planes[I915_MAX_DBUF_SLICES]; -}; - struct intel_bw_state { struct intel_global_state base; - struct intel_dbuf_bw dbuf_bw[I915_MAX_PIPES]; /* * Contains a bit mask, used to determine, whether correspondent @@ -1264,184 +1254,6 @@ static int intel_bw_check_qgv_points(struct intel_display *display, old_bw_state, new_bw_state); } -static bool intel_dbuf_bw_changed(struct intel_display *display, - const struct intel_dbuf_bw *old_dbuf_bw, - const struct intel_dbuf_bw *new_dbuf_bw) -{ - enum dbuf_slice slice; - - for_each_dbuf_slice(display, slice) { - if (old_dbuf_bw->max_bw[slice] != new_dbuf_bw->max_bw[slice] || - old_dbuf_bw->active_planes[slice] != new_dbuf_bw->active_planes[slice]) - return true; - } - - return false; -} - -static bool intel_bw_state_changed(struct intel_display *display, - const struct intel_bw_state *old_bw_state, - const struct intel_bw_state *new_bw_state) -{ - enum pipe pipe; - - for_each_pipe(display, pipe) { - const struct intel_dbuf_bw *old_dbuf_bw = - &old_bw_state->dbuf_bw[pipe]; - const struct intel_dbuf_bw *new_dbuf_bw = - &new_bw_state->dbuf_bw[pipe]; - - if (intel_dbuf_bw_changed(display, old_dbuf_bw, new_dbuf_bw)) - return true; - } - - return false; -} - -static void skl_plane_calc_dbuf_bw(struct intel_dbuf_bw *dbuf_bw, - struct intel_crtc *crtc, - enum plane_id plane_id, - const struct skl_ddb_entry *ddb, - unsigned int data_rate) -{ - struct intel_display *display = to_intel_display(crtc); - unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(display, ddb); - enum dbuf_slice slice; - - /* - * The arbiter can only really guarantee an - * equal share of the total bw to each plane. - */ - for_each_dbuf_slice_in_mask(display, slice, dbuf_mask) { - dbuf_bw->max_bw[slice] = max(dbuf_bw->max_bw[slice], data_rate); - dbuf_bw->active_planes[slice] |= BIT(plane_id); - } -} - -static void skl_crtc_calc_dbuf_bw(struct intel_dbuf_bw *dbuf_bw, - const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - enum plane_id plane_id; - - memset(dbuf_bw, 0, sizeof(*dbuf_bw)); - - if (!crtc_state->hw.active) - return; - - for_each_plane_id_on_crtc(crtc, plane_id) { - /* - * We assume cursors are small enough - * to not cause bandwidth problems. - */ - if (plane_id == PLANE_CURSOR) - continue; - - skl_plane_calc_dbuf_bw(dbuf_bw, crtc, plane_id, - &crtc_state->wm.skl.plane_ddb[plane_id], - crtc_state->data_rate[plane_id]); - - if (DISPLAY_VER(display) < 11) - skl_plane_calc_dbuf_bw(dbuf_bw, crtc, plane_id, - &crtc_state->wm.skl.plane_ddb_y[plane_id], - crtc_state->data_rate[plane_id]); - } -} - -/* "Maximum Data Buffer Bandwidth" */ -static int -intel_bw_dbuf_min_cdclk(struct intel_display *display, - const struct intel_bw_state *bw_state) -{ - unsigned int total_max_bw = 0; - enum dbuf_slice slice; - - for_each_dbuf_slice(display, slice) { - int num_active_planes = 0; - unsigned int max_bw = 0; - enum pipe pipe; - - /* - * The arbiter can only really guarantee an - * equal share of the total bw to each plane. - */ - for_each_pipe(display, pipe) { - const struct intel_dbuf_bw *dbuf_bw = &bw_state->dbuf_bw[pipe]; - - max_bw = max(dbuf_bw->max_bw[slice], max_bw); - num_active_planes += hweight8(dbuf_bw->active_planes[slice]); - } - max_bw *= num_active_planes; - - total_max_bw = max(total_max_bw, max_bw); - } - - return DIV_ROUND_UP(total_max_bw, 64); -} - -int intel_bw_min_cdclk(struct intel_display *display, - const struct intel_bw_state *bw_state) -{ - int min_cdclk; - - min_cdclk = intel_bw_dbuf_min_cdclk(display, bw_state); - - return min_cdclk; -} - -int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, - bool *need_cdclk_calc) -{ - struct intel_display *display = to_intel_display(state); - struct intel_bw_state *new_bw_state = NULL; - const struct intel_bw_state *old_bw_state = NULL; - const struct intel_crtc_state *old_crtc_state; - const struct intel_crtc_state *new_crtc_state; - struct intel_crtc *crtc; - int ret, i; - - if (DISPLAY_VER(display) < 9) - return 0; - - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { - struct intel_dbuf_bw old_dbuf_bw, new_dbuf_bw; - - skl_crtc_calc_dbuf_bw(&old_dbuf_bw, old_crtc_state); - skl_crtc_calc_dbuf_bw(&new_dbuf_bw, new_crtc_state); - - if (!intel_dbuf_bw_changed(display, &old_dbuf_bw, &new_dbuf_bw)) - continue; - - new_bw_state = intel_atomic_get_bw_state(state); - if (IS_ERR(new_bw_state)) - return PTR_ERR(new_bw_state); - - old_bw_state = intel_atomic_get_old_bw_state(state); - - new_bw_state->dbuf_bw[crtc->pipe] = new_dbuf_bw; - } - - if (!old_bw_state) - return 0; - - if (intel_bw_state_changed(display, old_bw_state, new_bw_state)) { - int ret = intel_atomic_lock_global_state(&new_bw_state->base); - if (ret) - return ret; - } - - ret = intel_cdclk_update_bw_min_cdclk(state, - intel_bw_min_cdclk(display, old_bw_state), - intel_bw_min_cdclk(display, new_bw_state), - need_cdclk_calc); - if (ret) - return ret; - - return 0; -} - static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed) { struct intel_display *display = to_intel_display(state); @@ -1647,8 +1459,6 @@ void intel_bw_update_hw_state(struct intel_display *display) if (DISPLAY_VER(display) >= 11) intel_bw_crtc_update(bw_state, crtc_state); - skl_crtc_calc_dbuf_bw(&bw_state->dbuf_bw[pipe], crtc_state); - /* initially SAGV has been forced off */ bw_state->pipe_sagv_reject |= BIT(pipe); } @@ -1666,7 +1476,6 @@ void intel_bw_crtc_disable_noatomic(struct intel_crtc *crtc) bw_state->data_rate[pipe] = 0; bw_state->num_active_planes[pipe] = 0; - memset(&bw_state->dbuf_bw[pipe], 0, sizeof(bw_state->dbuf_bw[pipe])); } static struct intel_global_state * diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index 4bb3a637b295..051e163f2f15 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -30,10 +30,6 @@ void intel_bw_init_hw(struct intel_display *display); int intel_bw_init(struct intel_display *display); int intel_bw_atomic_check(struct intel_atomic_state *state); int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state); -int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, - bool *need_cdclk_calc); -int intel_bw_min_cdclk(struct intel_display *display, - const struct intel_bw_state *bw_state); void intel_bw_update_hw_state(struct intel_display *display); void intel_bw_crtc_disable_noatomic(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index bd45b719d4f8..c41d1a45a798 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -38,6 +38,7 @@ #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_crtc.h" +#include "intel_dbuf_bw.h" #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" @@ -133,8 +134,8 @@ struct intel_cdclk_state { */ struct intel_cdclk_config actual; - /* minimum acceptable cdclk to satisfy bandwidth requirements */ - int bw_min_cdclk; + /* minimum acceptable cdclk to satisfy DBUF bandwidth requirements */ + int dbuf_bw_min_cdclk; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ @@ -2895,9 +2896,9 @@ static int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, return 0; } -int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, - int old_min_cdclk, int new_min_cdclk, - bool *need_cdclk_calc) +int intel_cdclk_update_dbuf_bw_min_cdclk(struct intel_atomic_state *state, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc) { struct intel_display *display = to_intel_display(state); struct intel_cdclk_state *cdclk_state; @@ -2914,7 +2915,7 @@ int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, if (IS_ERR(cdclk_state)) return PTR_ERR(cdclk_state); - old_min_cdclk = cdclk_state->bw_min_cdclk; + old_min_cdclk = cdclk_state->dbuf_bw_min_cdclk; if (new_min_cdclk == old_min_cdclk) return 0; @@ -2922,7 +2923,7 @@ int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, if (!allow_cdclk_decrease && new_min_cdclk < old_min_cdclk) return 0; - cdclk_state->bw_min_cdclk = new_min_cdclk; + cdclk_state->dbuf_bw_min_cdclk = new_min_cdclk; ret = intel_atomic_lock_global_state(&cdclk_state->base); if (ret) @@ -2931,7 +2932,7 @@ int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, *need_cdclk_calc = true; drm_dbg_kms(display->drm, - "bandwidth min cdclk: %d kHz -> %d kHz\n", + "dbuf bandwidth min cdclk: %d kHz -> %d kHz\n", old_min_cdclk, new_min_cdclk); return 0; @@ -2954,7 +2955,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) int min_cdclk; min_cdclk = cdclk_state->force_min_cdclk; - min_cdclk = max(min_cdclk, cdclk_state->bw_min_cdclk); + min_cdclk = max(min_cdclk, cdclk_state->dbuf_bw_min_cdclk); for_each_pipe(display, pipe) min_cdclk = max(min_cdclk, cdclk_state->min_cdclk[pipe]); @@ -3480,7 +3481,7 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state) if (ret) return ret; - ret = intel_bw_calc_min_cdclk(state, &need_cdclk_calc); + ret = intel_dbuf_bw_calc_min_cdclk(state, &need_cdclk_calc); if (ret) return ret; @@ -3507,8 +3508,8 @@ int intel_cdclk_atomic_check(struct intel_atomic_state *state) void intel_cdclk_update_hw_state(struct intel_display *display) { - const struct intel_bw_state *bw_state = - to_intel_bw_state(display->bw.obj.state); + const struct intel_dbuf_bw_state *dbuf_bw_state = + to_intel_dbuf_bw_state(display->dbuf_bw.obj.state); struct intel_cdclk_state *cdclk_state = to_intel_cdclk_state(display->cdclk.obj.state); struct intel_crtc *crtc; @@ -3530,7 +3531,7 @@ void intel_cdclk_update_hw_state(struct intel_display *display) cdclk_state->min_voltage_level[pipe] = crtc_state->min_voltage_level; } - cdclk_state->bw_min_cdclk = intel_bw_min_cdclk(display, bw_state); + cdclk_state->dbuf_bw_min_cdclk = intel_dbuf_bw_min_cdclk(display, dbuf_bw_state); } void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc) @@ -4024,11 +4025,6 @@ int intel_cdclk_min_cdclk(const struct intel_cdclk_state *cdclk_state, enum pipe return cdclk_state->min_cdclk[pipe]; } -int intel_cdclk_bw_min_cdclk(const struct intel_cdclk_state *cdclk_state) -{ - return cdclk_state->bw_min_cdclk; -} - bool intel_cdclk_pmdemand_needs_update(struct intel_atomic_state *state) { const struct intel_cdclk_state *new_cdclk_state, *old_cdclk_state; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 1c1140b53b17..6b4cbb6d817b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -46,9 +46,9 @@ struct intel_cdclk_state * intel_atomic_get_cdclk_state(struct intel_atomic_state *state); void intel_cdclk_update_hw_state(struct intel_display *display); void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc); -int intel_cdclk_update_bw_min_cdclk(struct intel_atomic_state *state, - int old_min_cdclk, int new_min_cdclk, - bool *need_cdclk_calc); +int intel_cdclk_update_dbuf_bw_min_cdclk(struct intel_atomic_state *state, + int old_min_cdclk, int new_min_cdclk, + bool *need_cdclk_calc); #define to_intel_cdclk_state(global_state) \ container_of_const((global_state), struct intel_cdclk_state, base) @@ -65,7 +65,6 @@ int intel_cdclk_logical(const struct intel_cdclk_state *cdclk_state); int intel_cdclk_actual(const struct intel_cdclk_state *cdclk_state); int intel_cdclk_actual_voltage_level(const struct intel_cdclk_state *cdclk_state); int intel_cdclk_min_cdclk(const struct intel_cdclk_state *cdclk_state, enum pipe pipe); -int intel_cdclk_bw_min_cdclk(const struct intel_cdclk_state *cdclk_state); bool intel_cdclk_pmdemand_needs_update(struct intel_atomic_state *state); void intel_cdclk_force_min_cdclk(struct intel_cdclk_state *cdclk_state, int force_min_cdclk); void intel_cdclk_read_hw(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_dbuf_bw.c b/drivers/gpu/drm/i915/display/intel_dbuf_bw.c new file mode 100644 index 000000000000..8b8894c37f63 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dbuf_bw.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include + +#include "intel_dbuf_bw.h" +#include "intel_display_core.h" +#include "intel_display_types.h" +#include "skl_watermark.h" + +struct intel_dbuf_bw { + unsigned int max_bw[I915_MAX_DBUF_SLICES]; + u8 active_planes[I915_MAX_DBUF_SLICES]; +}; + +struct intel_dbuf_bw_state { + struct intel_global_state base; + struct intel_dbuf_bw dbuf_bw[I915_MAX_PIPES]; +}; + +struct intel_dbuf_bw_state *to_intel_dbuf_bw_state(struct intel_global_state *obj_state) +{ + return container_of(obj_state, struct intel_dbuf_bw_state, base); +} + +struct intel_dbuf_bw_state * +intel_atomic_get_old_dbuf_bw_state(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + struct intel_global_state *dbuf_bw_state; + + dbuf_bw_state = intel_atomic_get_old_global_obj_state(state, &display->dbuf_bw.obj); + + return to_intel_dbuf_bw_state(dbuf_bw_state); +} + +struct intel_dbuf_bw_state * +intel_atomic_get_new_dbuf_bw_state(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + struct intel_global_state *dbuf_bw_state; + + dbuf_bw_state = intel_atomic_get_new_global_obj_state(state, &display->dbuf_bw.obj); + + return to_intel_dbuf_bw_state(dbuf_bw_state); +} + +struct intel_dbuf_bw_state * +intel_atomic_get_dbuf_bw_state(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + struct intel_global_state *dbuf_bw_state; + + dbuf_bw_state = intel_atomic_get_global_obj_state(state, &display->dbuf_bw.obj); + if (IS_ERR(dbuf_bw_state)) + return ERR_CAST(dbuf_bw_state); + + return to_intel_dbuf_bw_state(dbuf_bw_state); +} + +static bool intel_dbuf_bw_changed(struct intel_display *display, + const struct intel_dbuf_bw *old_dbuf_bw, + const struct intel_dbuf_bw *new_dbuf_bw) +{ + enum dbuf_slice slice; + + for_each_dbuf_slice(display, slice) { + if (old_dbuf_bw->max_bw[slice] != new_dbuf_bw->max_bw[slice] || + old_dbuf_bw->active_planes[slice] != new_dbuf_bw->active_planes[slice]) + return true; + } + + return false; +} + +static bool intel_dbuf_bw_state_changed(struct intel_display *display, + const struct intel_dbuf_bw_state *old_dbuf_bw_state, + const struct intel_dbuf_bw_state *new_dbuf_bw_state) +{ + enum pipe pipe; + + for_each_pipe(display, pipe) { + const struct intel_dbuf_bw *old_dbuf_bw = + &old_dbuf_bw_state->dbuf_bw[pipe]; + const struct intel_dbuf_bw *new_dbuf_bw = + &new_dbuf_bw_state->dbuf_bw[pipe]; + + if (intel_dbuf_bw_changed(display, old_dbuf_bw, new_dbuf_bw)) + return true; + } + + return false; +} + +static void skl_plane_calc_dbuf_bw(struct intel_dbuf_bw *dbuf_bw, + struct intel_crtc *crtc, + enum plane_id plane_id, + const struct skl_ddb_entry *ddb, + unsigned int data_rate) +{ + struct intel_display *display = to_intel_display(crtc); + unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(display, ddb); + enum dbuf_slice slice; + + /* + * The arbiter can only really guarantee an + * equal share of the total bw to each plane. + */ + for_each_dbuf_slice_in_mask(display, slice, dbuf_mask) { + dbuf_bw->max_bw[slice] = max(dbuf_bw->max_bw[slice], data_rate); + dbuf_bw->active_planes[slice] |= BIT(plane_id); + } +} + +static void skl_crtc_calc_dbuf_bw(struct intel_dbuf_bw *dbuf_bw, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum plane_id plane_id; + + memset(dbuf_bw, 0, sizeof(*dbuf_bw)); + + if (!crtc_state->hw.active) + return; + + for_each_plane_id_on_crtc(crtc, plane_id) { + /* + * We assume cursors are small enough + * to not cause bandwidth problems. + */ + if (plane_id == PLANE_CURSOR) + continue; + + skl_plane_calc_dbuf_bw(dbuf_bw, crtc, plane_id, + &crtc_state->wm.skl.plane_ddb[plane_id], + crtc_state->data_rate[plane_id]); + + if (DISPLAY_VER(display) < 11) + skl_plane_calc_dbuf_bw(dbuf_bw, crtc, plane_id, + &crtc_state->wm.skl.plane_ddb_y[plane_id], + crtc_state->data_rate[plane_id]); + } +} + +/* "Maximum Data Buffer Bandwidth" */ +int intel_dbuf_bw_min_cdclk(struct intel_display *display, + const struct intel_dbuf_bw_state *dbuf_bw_state) +{ + unsigned int total_max_bw = 0; + enum dbuf_slice slice; + + for_each_dbuf_slice(display, slice) { + int num_active_planes = 0; + unsigned int max_bw = 0; + enum pipe pipe; + + /* + * The arbiter can only really guarantee an + * equal share of the total bw to each plane. + */ + for_each_pipe(display, pipe) { + const struct intel_dbuf_bw *dbuf_bw = &dbuf_bw_state->dbuf_bw[pipe]; + + max_bw = max(dbuf_bw->max_bw[slice], max_bw); + num_active_planes += hweight8(dbuf_bw->active_planes[slice]); + } + max_bw *= num_active_planes; + + total_max_bw = max(total_max_bw, max_bw); + } + + return DIV_ROUND_UP(total_max_bw, 64); +} + +int intel_dbuf_bw_calc_min_cdclk(struct intel_atomic_state *state, + bool *need_cdclk_calc) +{ + struct intel_display *display = to_intel_display(state); + struct intel_dbuf_bw_state *new_dbuf_bw_state = NULL; + const struct intel_dbuf_bw_state *old_dbuf_bw_state = NULL; + const struct intel_crtc_state *old_crtc_state; + const struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int ret, i; + + if (DISPLAY_VER(display) < 9) + return 0; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + struct intel_dbuf_bw old_dbuf_bw, new_dbuf_bw; + + skl_crtc_calc_dbuf_bw(&old_dbuf_bw, old_crtc_state); + skl_crtc_calc_dbuf_bw(&new_dbuf_bw, new_crtc_state); + + if (!intel_dbuf_bw_changed(display, &old_dbuf_bw, &new_dbuf_bw)) + continue; + + new_dbuf_bw_state = intel_atomic_get_dbuf_bw_state(state); + if (IS_ERR(new_dbuf_bw_state)) + return PTR_ERR(new_dbuf_bw_state); + + old_dbuf_bw_state = intel_atomic_get_old_dbuf_bw_state(state); + + new_dbuf_bw_state->dbuf_bw[crtc->pipe] = new_dbuf_bw; + } + + if (!old_dbuf_bw_state) + return 0; + + if (intel_dbuf_bw_state_changed(display, old_dbuf_bw_state, new_dbuf_bw_state)) { + ret = intel_atomic_lock_global_state(&new_dbuf_bw_state->base); + if (ret) + return ret; + } + + ret = intel_cdclk_update_dbuf_bw_min_cdclk(state, + intel_dbuf_bw_min_cdclk(display, old_dbuf_bw_state), + intel_dbuf_bw_min_cdclk(display, new_dbuf_bw_state), + need_cdclk_calc); + if (ret) + return ret; + + return 0; +} + +void intel_dbuf_bw_update_hw_state(struct intel_display *display) +{ + struct intel_dbuf_bw_state *dbuf_bw_state = + to_intel_dbuf_bw_state(display->dbuf_bw.obj.state); + struct intel_crtc *crtc; + + if (DISPLAY_VER(display) < 9) + return; + + for_each_intel_crtc(display->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + skl_crtc_calc_dbuf_bw(&dbuf_bw_state->dbuf_bw[crtc->pipe], crtc_state); + } +} + +void intel_dbuf_bw_crtc_disable_noatomic(struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + struct intel_dbuf_bw_state *dbuf_bw_state = + to_intel_dbuf_bw_state(display->dbuf_bw.obj.state); + enum pipe pipe = crtc->pipe; + + if (DISPLAY_VER(display) < 9) + return; + + memset(&dbuf_bw_state->dbuf_bw[pipe], 0, sizeof(dbuf_bw_state->dbuf_bw[pipe])); +} + +static struct intel_global_state * +intel_dbuf_bw_duplicate_state(struct intel_global_obj *obj) +{ + struct intel_dbuf_bw_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + return &state->base; +} + +static void intel_dbuf_bw_destroy_state(struct intel_global_obj *obj, + struct intel_global_state *state) +{ + kfree(state); +} + +static const struct intel_global_state_funcs intel_dbuf_bw_funcs = { + .atomic_duplicate_state = intel_dbuf_bw_duplicate_state, + .atomic_destroy_state = intel_dbuf_bw_destroy_state, +}; + +int intel_dbuf_bw_init(struct intel_display *display) +{ + struct intel_dbuf_bw_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + intel_atomic_global_obj_init(display, &display->dbuf_bw.obj, + &state->base, &intel_dbuf_bw_funcs); + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_dbuf_bw.h b/drivers/gpu/drm/i915/display/intel_dbuf_bw.h new file mode 100644 index 000000000000..61875b9d5969 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dbuf_bw.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __INTEL_DBUF_BW_H__ +#define __INTEL_DBUF_BW_H__ + +#include + +struct intel_atomic_state; +struct intel_dbuf_bw_state; +struct intel_crtc; +struct intel_display; +struct intel_global_state; + +struct intel_dbuf_bw_state * +to_intel_dbuf_bw_state(struct intel_global_state *obj_state); + +struct intel_dbuf_bw_state * +intel_atomic_get_old_dbuf_bw_state(struct intel_atomic_state *state); + +struct intel_dbuf_bw_state * +intel_atomic_get_new_dbuf_bw_state(struct intel_atomic_state *state); + +struct intel_dbuf_bw_state * +intel_atomic_get_dbuf_bw_state(struct intel_atomic_state *state); + +int intel_dbuf_bw_init(struct intel_display *display); +int intel_dbuf_bw_calc_min_cdclk(struct intel_atomic_state *state, + bool *need_cdclk_calc); +int intel_dbuf_bw_min_cdclk(struct intel_display *display, + const struct intel_dbuf_bw_state *dbuf_bw_state); +void intel_dbuf_bw_update_hw_state(struct intel_display *display); +void intel_dbuf_bw_crtc_disable_noatomic(struct intel_crtc *crtc); + +#endif /* __INTEL_DBUF_BW_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index df4da52cbdb3..32664098b407 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -369,6 +369,10 @@ struct intel_display { struct intel_global_obj obj; } dbuf; + struct { + struct intel_global_obj obj; + } dbuf_bw; + struct { /* * dkl.phy_lock protects against concurrent access of the diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index f84a0b26b7a6..38672d2896e3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -28,6 +28,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_crtc.h" +#include "intel_dbuf_bw.h" #include "intel_display_core.h" #include "intel_display_debugfs.h" #include "intel_display_driver.h" @@ -285,6 +286,10 @@ int intel_display_driver_probe_noirq(struct intel_display *display) if (ret) goto cleanup_wq_unordered; + ret = intel_dbuf_bw_init(display); + if (ret) + goto cleanup_wq_unordered; + ret = intel_bw_init(display); if (ret) goto cleanup_wq_unordered; diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 8415f3d703ed..deb877b2aebd 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -19,6 +19,7 @@ #include "intel_color.h" #include "intel_crtc.h" #include "intel_crtc_state_dump.h" +#include "intel_dbuf_bw.h" #include "intel_ddi.h" #include "intel_de.h" #include "intel_display.h" @@ -176,6 +177,7 @@ static void intel_crtc_disable_noatomic_complete(struct intel_crtc *crtc) intel_cdclk_crtc_disable_noatomic(crtc); skl_wm_crtc_disable_noatomic(crtc); intel_bw_crtc_disable_noatomic(crtc); + intel_dbuf_bw_crtc_disable_noatomic(crtc); intel_pmdemand_update_port_clock(display, pmdemand_state, pipe, 0); } @@ -872,6 +874,7 @@ static void intel_modeset_readout_hw_state(struct intel_display *display) intel_wm_get_hw_state(display); intel_bw_update_hw_state(display); + intel_dbuf_bw_update_hw_state(display); intel_cdclk_update_hw_state(display); intel_pmdemand_init_pmdemand_params(display, pmdemand_state); diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 815f0df2f83e..5d6e80d541e0 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -240,6 +240,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_crtc_state_dump.o \ i915-display/intel_cursor.o \ i915-display/intel_cx0_phy.o \ + i915-display/intel_dbuf_bw.o \ i915-display/intel_ddi.o \ i915-display/intel_ddi_buf_trans.o \ i915-display/intel_display.o \ -- cgit v1.2.3 From 2955b247446d279b87b63926e725ddca3c978d8c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:29 +0300 Subject: drm/i915: s/"not not"/"not"/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Elimiante the repeated "not not" in the bw code comments. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-3-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index a4d16711d336..d03da1ed4541 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -831,7 +831,7 @@ static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_stat { /* * We assume cursors are small enough - * to not not cause bandwidth problems. + * to not cause bandwidth problems. */ return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); } @@ -846,7 +846,7 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_ for_each_plane_id_on_crtc(crtc, plane_id) { /* * We assume cursors are small enough - * to not not cause bandwidth problems. + * to not cause bandwidth problems. */ if (plane_id == PLANE_CURSOR) continue; -- cgit v1.2.3 From 1c67c4366955c73debb0704f6793b779ccf0db59 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:30 +0300 Subject: drm/i915/bw: Relocate intel_bw_crtc_min_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_bw_crtc_min_cdclk() (aka. the thing that deals with what bspec calls "Maximum Pipe Read Bandwidth") doesn't really have anything to do with the rest of intel_bw.c (which is all about SAGV/QGV and memory bandwidth). Move it into intel_crtc.c (for the lack of a better place). And I don't really want to call intel_bw.c functions from intel_crtc.c, so move out intel_bw_crtc_data_rate() as well. And when we move that we pretty much have to move intel_bw_crtc_num_active_planes() as well since the two are meant to be used as a pair (they both implement the same "ignore the cursor" logic). And in an effort to keep the namespaces at least semi-sensible we flip the intel_bw_crtc_ prefix into intel_crtc_bw_. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-4-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_bw.c | 56 ++++-------------------------- drivers/gpu/drm/i915/display/intel_bw.h | 1 - drivers/gpu/drm/i915/display/intel_cdclk.c | 3 +- drivers/gpu/drm/i915/display/intel_crtc.c | 44 +++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_crtc.h | 4 +++ 5 files changed, 55 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index d03da1ed4541..92a060e02cf3 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -827,50 +827,6 @@ void intel_bw_init_hw(struct intel_display *display) icl_get_bw_info(display, dram_info, &icl_sa_info); } -static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) -{ - /* - * We assume cursors are small enough - * to not cause bandwidth problems. - */ - return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); -} - -static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - unsigned int data_rate = 0; - enum plane_id plane_id; - - for_each_plane_id_on_crtc(crtc, plane_id) { - /* - * We assume cursors are small enough - * to not cause bandwidth problems. - */ - if (plane_id == PLANE_CURSOR) - continue; - - data_rate += crtc_state->data_rate[plane_id]; - - if (DISPLAY_VER(display) < 11) - data_rate += crtc_state->data_rate_y[plane_id]; - } - - return data_rate; -} - -/* "Maximum Pipe Read Bandwidth" */ -int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - - if (DISPLAY_VER(display) < 12) - return 0; - - return DIV_ROUND_UP_ULL(mul_u32_u32(intel_bw_crtc_data_rate(crtc_state), 10), 512); -} - static unsigned int intel_bw_num_active_planes(struct intel_display *display, const struct intel_bw_state *bw_state) { @@ -1264,13 +1220,13 @@ static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *chan for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { unsigned int old_data_rate = - intel_bw_crtc_data_rate(old_crtc_state); + intel_crtc_bw_data_rate(old_crtc_state); unsigned int new_data_rate = - intel_bw_crtc_data_rate(new_crtc_state); + intel_crtc_bw_data_rate(new_crtc_state); unsigned int old_active_planes = - intel_bw_crtc_num_active_planes(old_crtc_state); + intel_crtc_bw_num_active_planes(old_crtc_state); unsigned int new_active_planes = - intel_bw_crtc_num_active_planes(new_crtc_state); + intel_crtc_bw_num_active_planes(new_crtc_state); struct intel_bw_state *new_bw_state; /* @@ -1426,9 +1382,9 @@ static void intel_bw_crtc_update(struct intel_bw_state *bw_state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); bw_state->data_rate[crtc->pipe] = - intel_bw_crtc_data_rate(crtc_state); + intel_crtc_bw_data_rate(crtc_state); bw_state->num_active_planes[crtc->pipe] = - intel_bw_crtc_num_active_planes(crtc_state); + intel_crtc_bw_num_active_planes(crtc_state); drm_dbg_kms(display->drm, "pipe %c data rate %u num active planes %u\n", pipe_name(crtc->pipe), diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index 051e163f2f15..99b447388245 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -29,7 +29,6 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state); void intel_bw_init_hw(struct intel_display *display); int intel_bw_init(struct intel_display *display); int intel_bw_atomic_check(struct intel_atomic_state *state); -int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state); void intel_bw_update_hw_state(struct intel_display *display); void intel_bw_crtc_disable_noatomic(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c41d1a45a798..0cef55b56b86 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -35,7 +35,6 @@ #include "i915_utils.h" #include "intel_atomic.h" #include "intel_audio.h" -#include "intel_bw.h" #include "intel_cdclk.h" #include "intel_crtc.h" #include "intel_dbuf_bw.h" @@ -2842,7 +2841,7 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat return 0; min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); - min_cdclk = max(min_cdclk, intel_bw_crtc_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, intel_crtc_bw_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, hsw_ips_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, intel_audio_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, vlv_dsi_min_cdclk(crtc_state)); diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 7b39c3a5887c..d300ba1dcd2c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -795,3 +795,47 @@ bool intel_any_crtc_active_changed(struct intel_atomic_state *state) return false; } + +unsigned int intel_crtc_bw_num_active_planes(const struct intel_crtc_state *crtc_state) +{ + /* + * We assume cursors are small enough + * to not cause bandwidth problems. + */ + return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); +} + +unsigned int intel_crtc_bw_data_rate(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + unsigned int data_rate = 0; + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + /* + * We assume cursors are small enough + * to not cause bandwidth problems. + */ + if (plane_id == PLANE_CURSOR) + continue; + + data_rate += crtc_state->data_rate[plane_id]; + + if (DISPLAY_VER(display) < 11) + data_rate += crtc_state->data_rate_y[plane_id]; + } + + return data_rate; +} + +/* "Maximum Pipe Read Bandwidth" */ +int intel_crtc_bw_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (DISPLAY_VER(display) < 12) + return 0; + + return DIV_ROUND_UP_ULL(mul_u32_u32(intel_crtc_bw_data_rate(crtc_state), 10), 512); +} diff --git a/drivers/gpu/drm/i915/display/intel_crtc.h b/drivers/gpu/drm/i915/display/intel_crtc.h index cee09e7cd3dc..07917e8a9ae3 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.h +++ b/drivers/gpu/drm/i915/display/intel_crtc.h @@ -65,4 +65,8 @@ bool intel_any_crtc_active_changed(struct intel_atomic_state *state); bool intel_crtc_active_changed(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state); +unsigned int intel_crtc_bw_num_active_planes(const struct intel_crtc_state *crtc_state); +unsigned int intel_crtc_bw_data_rate(const struct intel_crtc_state *crtc_state); +int intel_crtc_bw_min_cdclk(const struct intel_crtc_state *crtc_state); + #endif -- cgit v1.2.3 From 45bcbfe637786682903f52a4d1e0646a58342bbb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:31 +0300 Subject: drm/i915/ips: Eliminate the cdclk_state stuff from hsw_ips_compute_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorganize the IPS CDCLK handling such that the computed CDCLK frequency will always satisfy the IPS requirements. The only exceptional case is if IPS would push the CDCLK above the platform max, but in that case we can simply disable IPS. To make this 100% race free we must move the enable_ips modparam check out from the min CDCLK computation path so that there is no chance of hsw_min_cdclk() and hsw_ips_compute_config() observing a different enable_ips value during the same commit. This allows us to completely remove the cdclk_state stuff from the IPS code. We only ever have to compare the IPS min CDCLK against the platform max CDCLK. Thus we eliminate any ordering requirements between intel_cdclk_atomic_check() and hsw_ips_compute_config(). Additionally we reduce the three copies of the code doing the 95% calculation into just one. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-5-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/hsw_ips.c | 59 ++++++++++++++++------------------ 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c index 927fe56aec77..f444c5b7a27b 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.c +++ b/drivers/gpu/drm/i915/display/hsw_ips.c @@ -191,45 +191,46 @@ bool hsw_crtc_supports_ips(struct intel_crtc *crtc) static bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - /* IPS only exists on ULT machines and is tied to pipe A. */ if (!hsw_crtc_supports_ips(crtc)) return false; - if (!display->params.enable_ips) - return false; - if (crtc_state->pipe_bpp > 24) return false; - /* - * We compare against max which means we must take - * the increased cdclk requirement into account when - * calculating the new cdclk. - * - * Should measure whether using a lower cdclk w/o IPS - */ - if (display->platform.broadwell && - crtc_state->pixel_rate > display->cdclk.max_cdclk_freq * 95 / 100) - return false; - return true; } +static int _hsw_ips_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (display->platform.broadwell) + return DIV_ROUND_UP(crtc_state->pixel_rate * 100, 95); + + /* no IPS specific limits to worry about */ + return 0; +} + int hsw_ips_min_cdclk(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); + int min_cdclk; - if (!display->platform.broadwell) + if (!hsw_crtc_state_ips_capable(crtc_state)) return 0; - if (!hsw_crtc_state_ips_capable(crtc_state)) + min_cdclk = _hsw_ips_min_cdclk(crtc_state); + + /* + * Do not ask for more than the max CDCLK frequency, + * if that is not enough IPS will simply not be used. + */ + if (min_cdclk > display->cdclk.max_cdclk_freq) return 0; - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - return DIV_ROUND_UP(crtc_state->pixel_rate * 100, 95); + return min_cdclk; } int hsw_ips_compute_config(struct intel_atomic_state *state, @@ -244,6 +245,12 @@ int hsw_ips_compute_config(struct intel_atomic_state *state, if (!hsw_crtc_state_ips_capable(crtc_state)) return 0; + if (_hsw_ips_min_cdclk(crtc_state) > display->cdclk.max_cdclk_freq) + return 0; + + if (!display->params.enable_ips) + return 0; + /* * When IPS gets enabled, the pipe CRC changes. Since IPS gets * enabled and disabled dynamically based on package C states, @@ -257,18 +264,6 @@ int hsw_ips_compute_config(struct intel_atomic_state *state, if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))) return 0; - if (display->platform.broadwell) { - const struct intel_cdclk_state *cdclk_state; - - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) - return PTR_ERR(cdclk_state); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (crtc_state->pixel_rate > intel_cdclk_logical(cdclk_state) * 95 / 100) - return 0; - } - crtc_state->ips_enabled = true; return 0; -- cgit v1.2.3 From 68de4d6544425443bec666b644300f098a5c7618 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:32 +0300 Subject: drm/i915/fbc: Decouple FBC from intel_cdclk_atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always account for FBC requirements in intel_crtc_compute_min_cdclk() so that we don't have to worry about the actual CDCLK frequency in intel_fbc_check_plane() any longer. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-6-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 49 ++++++++++++++++++++++-------- drivers/gpu/drm/i915/display/intel_fbc.h | 1 + 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0cef55b56b86..8fb19e605832 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2842,6 +2842,7 @@ static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_stat min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); min_cdclk = max(min_cdclk, intel_crtc_bw_min_cdclk(crtc_state)); + min_cdclk = max(min_cdclk, intel_fbc_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, hsw_ips_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, intel_audio_min_cdclk(crtc_state)); min_cdclk = max(min_cdclk, vlv_dsi_min_cdclk(crtc_state)); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 83903bb696ff..10ef3136dadc 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -53,7 +53,6 @@ #include "i915_vgpu.h" #include "i915_vma.h" #include "i9xx_plane_regs.h" -#include "intel_cdclk.h" #include "intel_de.h" #include "intel_display_device.h" #include "intel_display_regs.h" @@ -1420,6 +1419,18 @@ intel_fbc_prepare_dirty_rect(struct intel_atomic_state *state, } } +static int _intel_fbc_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + /* WaFbcExceedCdClockThreshold:hsw,bdw */ + if (display->platform.haswell || display->platform.broadwell) + return DIV_ROUND_UP(crtc_state->pixel_rate * 100, 95); + + /* no FBC specific limits to worry about */ + return 0; +} + static int intel_fbc_check_plane(struct intel_atomic_state *state, struct intel_plane *plane) { @@ -1559,18 +1570,9 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - /* WaFbcExceedCdClockThreshold:hsw,bdw */ - if (display->platform.haswell || display->platform.broadwell) { - const struct intel_cdclk_state *cdclk_state; - - cdclk_state = intel_atomic_get_cdclk_state(state); - if (IS_ERR(cdclk_state)) - return PTR_ERR(cdclk_state); - - if (crtc_state->pixel_rate >= intel_cdclk_logical(cdclk_state) * 95 / 100) { - plane_state->no_fbc_reason = "pixel rate too high"; - return 0; - } + if (_intel_fbc_min_cdclk(crtc_state) > display->cdclk.max_cdclk_freq) { + plane_state->no_fbc_reason = "pixel rate too high"; + return 0; } plane_state->no_fbc_reason = NULL; @@ -1578,6 +1580,27 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } +int intel_fbc_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + int min_cdclk; + + if (!plane->fbc) + return 0; + + min_cdclk = _intel_fbc_min_cdclk(crtc_state); + + /* + * Do not ask for more than the max CDCLK frequency, + * if that is not enough FBC will simply not be used. + */ + if (min_cdclk > display->cdclk.max_cdclk_freq) + return 0; + + return min_cdclk; +} static bool intel_fbc_can_flip_nuke(struct intel_atomic_state *state, struct intel_crtc *crtc, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h index 0e715cb6b4e6..c86562404a00 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.h +++ b/drivers/gpu/drm/i915/display/intel_fbc.h @@ -28,6 +28,7 @@ enum intel_fbc_id { }; int intel_fbc_atomic_check(struct intel_atomic_state *state); +int intel_fbc_min_cdclk(const struct intel_crtc_state *crtc_state); bool intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_post_update(struct intel_atomic_state *state, -- cgit v1.2.3 From 5785ace8e1a07678ac914ed18803460db10bf87a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:33 +0300 Subject: drm/i915: s/min_cdck[]/plane_min_cdclk[]/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename crtc_state->min_cdclk[] into crtc_state->plane_min_cdclk[] to better reflect what it represents. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-7-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- drivers/gpu/drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_modeset_setup.c | 6 +++--- drivers/gpu/drm/i915/display/intel_plane.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 8fb19e605832..6c477b6a7b8a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2828,7 +2828,7 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) int min_cdclk = 0; for_each_intel_plane_on_crtc(display->drm, crtc, plane) - min_cdclk = max(min_cdclk, crtc_state->min_cdclk[plane->id]); + min_cdclk = max(min_cdclk, crtc_state->plane_min_cdclk[plane->id]); return min_cdclk; } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 87b7cec35320..f77d120733fd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1192,7 +1192,7 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; - int min_cdclk[I915_MAX_PLANES]; + int plane_min_cdclk[I915_MAX_PLANES]; /* for packed/planar CbCr */ u32 data_rate[I915_MAX_PLANES]; diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index deb877b2aebd..d5c432b613ce 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -853,16 +853,16 @@ static void intel_modeset_readout_hw_state(struct intel_display *display) */ if (plane_state->uapi.visible && plane->min_cdclk) { if (crtc_state->double_wide || DISPLAY_VER(display) >= 10) - crtc_state->min_cdclk[plane->id] = + crtc_state->plane_min_cdclk[plane->id] = DIV_ROUND_UP(crtc_state->pixel_rate, 2); else - crtc_state->min_cdclk[plane->id] = + crtc_state->plane_min_cdclk[plane->id] = crtc_state->pixel_rate; } drm_dbg_kms(display->drm, "[PLANE:%d:%s] min_cdclk %d kHz\n", plane->base.base.id, plane->base.name, - crtc_state->min_cdclk[plane->id]); + crtc_state->plane_min_cdclk[plane->id]); } intel_pmdemand_update_port_clock(display, pmdemand_state, pipe, diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 074de9275951..78329deb395a 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -304,7 +304,7 @@ static void intel_plane_calc_min_cdclk(struct intel_atomic_state *state, new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - new_crtc_state->min_cdclk[plane->id] = + new_crtc_state->plane_min_cdclk[plane->id] = plane->min_cdclk(new_crtc_state, plane_state); } @@ -391,7 +391,7 @@ void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, crtc_state->data_rate_y[plane->id] = 0; crtc_state->rel_data_rate[plane->id] = 0; crtc_state->rel_data_rate_y[plane->id] = 0; - crtc_state->min_cdclk[plane->id] = 0; + crtc_state->plane_min_cdclk[plane->id] = 0; plane_state->uapi.visible = false; } -- cgit v1.2.3 From 7a8d9cfa6db0446246769ebb29b423ef4e6ed8eb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:34 +0300 Subject: drm/i915: Compute per-crtc min_cdclk earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we compute the min_cdclk for each pipe during intel_cdclk_atomic_check(). But that is too late for the pipe prefill vs. vblank length checks (done during intel_compute_global_watermarks). We can't just reorder these things due to other dependencies, so instead pull only the per-crtc minimum cdclk calculation ahead. We should have enough information for that as soon as we've computed the min cdclk for the planes. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-8-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_cdclk.c | 8 ++++---- drivers/gpu/drm/i915/display/intel_cdclk.h | 2 ++ drivers/gpu/drm/i915/display/intel_display.c | 3 +++ drivers/gpu/drm/i915/display/intel_display_types.h | 2 ++ drivers/gpu/drm/i915/display/intel_modeset_setup.c | 5 +++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6c477b6a7b8a..e92e7fd9fd13 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2833,7 +2833,7 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) return min_cdclk; } -static int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) +int intel_crtc_min_cdclk(const struct intel_crtc_state *crtc_state) { int min_cdclk; @@ -3306,8 +3306,8 @@ static int intel_crtcs_calc_min_cdclk(struct intel_atomic_state *state, for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { ret = intel_cdclk_update_crtc_min_cdclk(state, crtc, - intel_crtc_compute_min_cdclk(old_crtc_state), - intel_crtc_compute_min_cdclk(new_crtc_state), + old_crtc_state->min_cdclk, + new_crtc_state->min_cdclk, need_cdclk_calc); if (ret) return ret; @@ -3527,7 +3527,7 @@ void intel_cdclk_update_hw_state(struct intel_display *display) if (crtc_state->hw.active) cdclk_state->active_pipes |= BIT(pipe); - cdclk_state->min_cdclk[pipe] = intel_crtc_compute_min_cdclk(crtc_state); + cdclk_state->min_cdclk[pipe] = crtc_state->min_cdclk; cdclk_state->min_voltage_level[pipe] = crtc_state->min_voltage_level; } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 6b4cbb6d817b..1ff7d078b42c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -75,4 +75,6 @@ int intel_cdclk_min_cdclk_for_prefill(const struct intel_crtc_state *crtc_state, unsigned int prefill_lines_unadjusted, unsigned int prefill_lines_available); +int intel_crtc_min_cdclk(const struct intel_crtc_state *crtc_state); + #endif /* __INTEL_CDCLK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4367ecfab2b3..bbb6ff929d64 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6449,6 +6449,9 @@ int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) + new_crtc_state->min_cdclk = intel_crtc_min_cdclk(new_crtc_state); + ret = intel_compute_global_watermarks(state); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index f77d120733fd..203dd38a9ec4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1192,6 +1192,8 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; + int min_cdclk; + int plane_min_cdclk[I915_MAX_PLANES]; /* for packed/planar CbCr */ diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index d5c432b613ce..0dcb0597879a 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -865,6 +865,11 @@ static void intel_modeset_readout_hw_state(struct intel_display *display) crtc_state->plane_min_cdclk[plane->id]); } + crtc_state->min_cdclk = intel_crtc_min_cdclk(crtc_state); + + drm_dbg_kms(display->drm, "[CRTC:%d:%s] min_cdclk %d kHz\n", + crtc->base.base.id, crtc->base.name, crtc_state->min_cdclk); + intel_pmdemand_update_port_clock(display, pmdemand_state, pipe, crtc_state->port_clock); } -- cgit v1.2.3 From 75255a03bdfb107e78cb7b9da2ef03cb6ba8e454 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 13 Oct 2025 23:12:35 +0300 Subject: drm/i915: Include the per-crtc minimum cdclk in the crtc state dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the crtc minimum cdclk in the crtc state dump. Might help figuring out who needed how much cdclk. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251013201236.30084-9-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index a14bcda4446c..23e25e97d060 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -314,9 +314,9 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_printf(&p, "pipe mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(&pipe_config->hw.pipe_mode)); intel_dump_crtc_timings(&p, &pipe_config->hw.pipe_mode); - drm_printf(&p, "port clock: %d, pipe src: " DRM_RECT_FMT ", pixel rate %d\n", + drm_printf(&p, "port clock: %d, pipe src: " DRM_RECT_FMT ", pixel rate %d, min cdclk %d\n", pipe_config->port_clock, DRM_RECT_ARG(&pipe_config->pipe_src), - pipe_config->pixel_rate); + pipe_config->pixel_rate, pipe_config->min_cdclk); drm_printf(&p, "linetime: %d, ips linetime: %d\n", pipe_config->linetime, pipe_config->ips_linetime); -- cgit v1.2.3 From 2ada9cb1df3f5405a01d013b708b1b0914efccfe Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 15 Oct 2025 17:03:51 -0700 Subject: drm/i915: Fix conversion between clock ticks and nanoseconds When tick values are large, the multiplication by NSEC_PER_SEC is larger than 64 bits and results in bad conversions. The issue is seen in PMU busyness counters that look like they have wrapped around due to bad conversion. i915 PMU implementation returns monotonically increasing counters. If a count is lesser than previous one, it will only return the larger value until the smaller value catches up. The user will see this as zero delta between two measurements even though the engines are busy. Fix it by using mul_u64_u32_div() Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14955 Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://lore.kernel.org/r/20251016000350.1152382-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 88b147fa5cb1..c90b35881a26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -205,7 +205,7 @@ static u64 div_u64_roundup(u64 nom, u32 den) u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) { - return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency); + return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency); } u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) @@ -215,7 +215,7 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) { - return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC); + return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC); } u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) -- cgit v1.2.3 From c88e70dc8bfca9a2be74a100387b1b66de973128 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:28 +0300 Subject: drm/i915/dsc: Add helper to enable the DSC configuration for a CRTC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to enable the DSC compression configuration for a CRTC. Follow-up changes will introduce tracking for the same DSC state on the whole link, which will need to be set whenever DSC is enabled for the CRTC. Also, according to the above, when querying the DSC state on the link, both the CRTC's and the link's DSC state must be considered. Setting the DSC configuration for a CRTC and querying the DSC configuration for the link (added by follow-up changes) is better done via helper functions based on the above, prepare for that here. Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/icl_dsi.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 3 ++- drivers/gpu/drm/i915/display/intel_vdsc.c | 5 +++++ drivers/gpu/drm/i915/display/intel_vdsc.h | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 37faa8f19f6e..297368ff42a5 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1655,7 +1655,7 @@ static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, if (ret) return ret; - crtc_state->dsc.compression_enable = true; + intel_dsc_enable_on_crtc(crtc_state); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 7059d55687cf..fc1949e0c4de 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2475,7 +2475,8 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, return ret; } - pipe_config->dsc.compression_enable = true; + intel_dsc_enable_on_crtc(pipe_config); + drm_dbg_kms(display->drm, "DP DSC computed with Input Bpp = %d " "Compressed Bpp = " FXP_Q4_FMT " Slice Count = %d\n", pipe_config->pipe_bpp, diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index bca747e24a7f..803f3b395c79 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -372,6 +372,11 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) return 0; } +void intel_dsc_enable_on_crtc(struct intel_crtc_state *crtc_state) +{ + crtc_state->dsc.compression_enable = true; +} + enum intel_display_power_domain intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder) { diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 2139391ff881..8c7c7fb652c3 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -20,6 +20,7 @@ void intel_uncompressed_joiner_enable(const struct intel_crtc_state *crtc_state) void intel_dsc_enable(const struct intel_crtc_state *crtc_state); void intel_dsc_disable(const struct intel_crtc_state *crtc_state); int intel_dsc_compute_params(struct intel_crtc_state *pipe_config); +void intel_dsc_enable_on_crtc(struct intel_crtc_state *crtc_state); void intel_dsc_get_config(struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder); -- cgit v1.2.3 From 69df31263bcabc527a5b526fb8972cb080a179b3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:29 +0300 Subject: drm/i915/dp: Ensure the FEC state stays disabled for UHBR links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, in the DP SST case the FEC state is computed before intel_crtc_state::port_clock is initialized, hence intel_dp_is_uhbr() will always return false and the FEC state will be always computed assuming a non-UHBR link. This happens to work, since the FEC state is recomputed later in intel_dp_mtp_tu_compute_config(), where port_clock will be set already, so intel_crtc_state::fec_enable will be reset as expected for UHBR. This also depends on link rates being tried in an increasing order (i.e. from non-UHBR -> UHBR link rates) in dsc_compute_link_config(), thus intel_crtc_state::fec_enable being set for the non-UHBR rates and getting reset for the first UHBR rate as expected. A follow-up change will reuse intel_dp_fec_compute_config() for the DP MST state computation, prepare for that here, making sure that the function determines the correct intel_crtc_state::fec_enable=false state for UHBR link rates based on the above. The DP SST and MST state computation should be further unified to avoid computing/setting the intel_crtc_state::fec_enable state multiple times, but that's left for a follow-up change. For now add only code comments about this. Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-3-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 10 +++++++--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 +++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index fc1949e0c4de..7c568c23134f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2368,6 +2368,9 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, static void intel_dp_fec_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { + if (intel_dp_is_uhbr(crtc_state)) + return; + if (crtc_state->fec_enable) return; @@ -2379,9 +2382,6 @@ static void intel_dp_fec_compute_config(struct intel_dp *intel_dp, if (intel_dp_is_edp(intel_dp)) return; - if (intel_dp_is_uhbr(crtc_state)) - return; - crtc_state->fec_enable = true; } @@ -2400,6 +2400,10 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, bool is_mst = intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST); int ret; + /* + * FIXME: set the FEC enabled state once pipe_config->port_clock is + * already known, so the UHBR/non-UHBR mode can be determined. + */ intel_dp_fec_compute_config(intel_dp, pipe_config); if (!intel_dp_dsc_supports_format(connector, pipe_config->output_format)) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index f2266b265304..27e952a67c34 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -293,6 +293,11 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, mst_stream_update_slots(crtc_state, mst_state); } + /* + * NOTE: The following must reset crtc_state->fec_enable for UHBR/DSC + * after it was set by intel_dp_dsc_compute_config() -> + * intel_dp_fec_compute_config(). + */ if (dsc) { if (!intel_dp_supports_fec(intel_dp, connector, crtc_state)) return -EINVAL; -- cgit v1.2.3 From cb6c8f1f6f46ac2cbfb42ce8eb8b18257aeaa91a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:30 +0300 Subject: drm/i915/dp: Export helper to determine if FEC on non-UHBR links is required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the helper function to determine if FEC is required on a non-UHBR (8b10b) SST or MST link. A follow up change will take this into use for MST as well. While at it determine the output type from the CRTC state, which allows dropping the intel_dp argument. Also make the function return the required FEC state, instead of setting this in the CRTC state, which allows only querying this requirement, without changing the state. Also rename the function to intel_dp_needs_8b10b_fec(), to clarify that the function determines if FEC is required on an 8b10b link (on 128b132b links FEC is always enabled by the HW implicitly, so the function will return false for that case). Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-4-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++++-------- drivers/gpu/drm/i915/display/intel_dp.h | 2 ++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 7c568c23134f..844118735998 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2365,24 +2365,29 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, return 0; } -static void intel_dp_fec_compute_config(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state) +/* + * Return whether FEC must be enabled for 8b10b SST or MST links. On 128b132b + * links FEC is always enabled implicitly by the HW, so this function returns + * false for that case. + */ +bool intel_dp_needs_8b10b_fec(const struct intel_crtc_state *crtc_state, + bool dsc_enabled_on_crtc) { if (intel_dp_is_uhbr(crtc_state)) - return; + return false; if (crtc_state->fec_enable) - return; + return true; /* * Though eDP v1.5 supports FEC with DSC, unlike DP, it is optional. * Since, FEC is a bandwidth overhead, continue to not enable it for * eDP. Until, there is a good reason to do so. */ - if (intel_dp_is_edp(intel_dp)) - return; + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return false; - crtc_state->fec_enable = true; + return dsc_enabled_on_crtc; } int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, @@ -2404,7 +2409,7 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, * FIXME: set the FEC enabled state once pipe_config->port_clock is * already known, so the UHBR/non-UHBR mode can be determined. */ - intel_dp_fec_compute_config(intel_dp, pipe_config); + pipe_config->fec_enable = intel_dp_needs_8b10b_fec(pipe_config, true); if (!intel_dp_dsc_supports_format(connector, pipe_config->output_format)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 281ced3a3b39..0537be20fe7b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -73,6 +73,8 @@ void intel_dp_encoder_flush_work(struct drm_encoder *encoder); int intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); +bool intel_dp_needs_8b10b_fec(const struct intel_crtc_state *crtc_state, + bool dsc_enabled_on_crtc); int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27e952a67c34..d0590b5ffffd 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -296,7 +296,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, /* * NOTE: The following must reset crtc_state->fec_enable for UHBR/DSC * after it was set by intel_dp_dsc_compute_config() -> - * intel_dp_fec_compute_config(). + * intel_dp_needs_8b10b_fec(). */ if (dsc) { if (!intel_dp_supports_fec(intel_dp, connector, crtc_state)) -- cgit v1.2.3 From b762ae48293e2cc2145cdc91eb596d057f1aff11 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:31 +0300 Subject: drm/i915/dp_mst: Reuse the DP-SST helper function to compute FEC config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reuse the DP-SST helper to compute the state for the FEC enabled state for DP-MST as well. Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index d0590b5ffffd..0cbb4c3a8e22 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -298,12 +298,10 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, * after it was set by intel_dp_dsc_compute_config() -> * intel_dp_needs_8b10b_fec(). */ - if (dsc) { - if (!intel_dp_supports_fec(intel_dp, connector, crtc_state)) - return -EINVAL; - - crtc_state->fec_enable = !intel_dp_is_uhbr(crtc_state); - } + crtc_state->fec_enable = intel_dp_needs_8b10b_fec(crtc_state, dsc); + if (crtc_state->fec_enable && + !intel_dp_supports_fec(intel_dp, connector, crtc_state)) + return -EINVAL; max_dpt_bpp_x16 = fxp_q4_from_int(intel_dp_mst_max_dpt_bpp(crtc_state, dsc)); if (max_dpt_bpp_x16 && max_bpp_x16 > max_dpt_bpp_x16) { -- cgit v1.2.3 From 7c027070e98d7b515e4d3a94b54d288c61cb5918 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:32 +0300 Subject: drm/i915/dp_mst: Track DSC enabled status on the MST link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track whether DSC is enabled on any CRTC on a link. On DP-SST (and DSI) this will always match the CRTC's DSC state, those links having only a single stream (aka CRTC). For instance, on DP-MST if DSC is enabled for CRTC#0, but disabled for CRTC#1, the DSC/FEC state for these CRTCs will be as follows: CRTC#0: - compression_enable = true - compression_enabled_on_link = true - fec_enable = true for 8b10b, false for 128b132b CRTC#1: - compression_enable = false - compression_enabled_on_link = true - fec_enable = true for 8b10b, false for 128b132b This patch only sets compression_enabled_on_link for CRTC#0 above and enables FEC on CRTC#0 if DSC was enabled on any other CRTC on the 8b10b MST link. A follow-up change will make sure that the state of all the CRTCs (CRTC#1 above) on an MST link is recomputed if DSC gets enabled on any CRTC, setting compression_enabled_on_link and fec_enable for these. Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-6-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 2 ++ drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_vdsc.c | 11 +++++++++++ drivers/gpu/drm/i915/display/intel_vdsc.h | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 203dd38a9ec4..20747fa4d3da 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1279,6 +1279,8 @@ struct intel_crtc_state { /* Display Stream compression state */ struct { + /* Only used for state computation, not read out from the HW. */ + bool compression_enabled_on_link; bool compression_enable; int num_streams; /* Compressed Bpp in U6.4 format (first 4 bits for fractional part) */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 844118735998..95884af242b3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2387,7 +2387,7 @@ bool intel_dp_needs_8b10b_fec(const struct intel_crtc_state *crtc_state, if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) return false; - return dsc_enabled_on_crtc; + return dsc_enabled_on_crtc || intel_dsc_enabled_on_link(crtc_state); } int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 803f3b395c79..dbf2cf1b896d 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -374,9 +374,20 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) void intel_dsc_enable_on_crtc(struct intel_crtc_state *crtc_state) { + crtc_state->dsc.compression_enabled_on_link = true; crtc_state->dsc.compression_enable = true; } +bool intel_dsc_enabled_on_link(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + drm_WARN_ON(display->drm, crtc_state->dsc.compression_enable && + !crtc_state->dsc.compression_enabled_on_link); + + return crtc_state->dsc.compression_enabled_on_link; +} + enum intel_display_power_domain intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder) { diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 8c7c7fb652c3..99f64ac54b27 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -21,6 +21,7 @@ void intel_dsc_enable(const struct intel_crtc_state *crtc_state); void intel_dsc_disable(const struct intel_crtc_state *crtc_state); int intel_dsc_compute_params(struct intel_crtc_state *pipe_config); void intel_dsc_enable_on_crtc(struct intel_crtc_state *crtc_state); +bool intel_dsc_enabled_on_link(const struct intel_crtc_state *crtc_state); void intel_dsc_get_config(struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder); -- cgit v1.2.3 From 470b84af457e34cbfa4d2c17dab78746736ab898 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:33 +0300 Subject: drm/i915/dp_mst: Recompute all MST link CRTCs if DSC gets enabled on the link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The state of all the CRTCs on an MST link must be recomputed, if DSC gets enabled on any of the CRTCs on the link. For instance an MST docking station's Panel Replay capability may depend on whether DSC is enabled on any of the dock's streams (aka CRTCs). To assist the Panel Replay state computation for a CRTC based on the above, track in the CRTC state if DSC is enabled on any CRTC on an MST link. The intel_link_bw_limits::force_fec_pipes mask is used for a reason similar to the above: enable FEC on all CRTCs of a non-UHBR (8b10b) MST link if DSC is enabled on any of the link's CRTCs. The FEC enabled state for a CRTC doesn't indicate if DSC is enabled on a UHBR MST link (FEC is always enabled by the HW for UHBR, hence it's not tracked by the intel_crtc_state::fec_enable flag for such links, where this flag is always false). Based on the above, to be able to determine the DSC state on both non-UHBR and UHBR MST links, track the more generic DSC-enabled-on-link state (instead of the FEC-enabled-on-link state) for each CRTC in intel_link_bw_limits. Signed-off-by: Imre Deak Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251015161934.262108-7-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++-------- drivers/gpu/drm/i915/display/intel_link_bw.c | 17 +++++++++-------- drivers/gpu/drm/i915/display/intel_link_bw.h | 2 +- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bbb6ff929d64..a8b4619de347 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4603,7 +4603,7 @@ intel_modeset_pipe_config(struct intel_atomic_state *state, if (ret) return ret; - crtc_state->fec_enable = limits->force_fec_pipes & BIT(crtc->pipe); + crtc_state->dsc.compression_enabled_on_link = limits->link_dsc_pipes & BIT(crtc->pipe); crtc_state->max_link_bpp_x16 = limits->max_bpp_x16[crtc->pipe]; if (crtc_state->pipe_bpp > fxp_q4_to_int(crtc_state->max_link_bpp_x16)) { diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0cbb4c3a8e22..a845b2612a3f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -814,14 +814,14 @@ static u8 get_pipes_downstream_of_mst_port(struct intel_atomic_state *state, return mask; } -static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state, +static int intel_dp_mst_check_dsc_change(struct intel_atomic_state *state, struct drm_dp_mst_topology_mgr *mst_mgr, struct intel_link_bw_limits *limits) { struct intel_display *display = to_intel_display(state); struct intel_crtc *crtc; u8 mst_pipe_mask; - u8 fec_pipe_mask = 0; + u8 dsc_pipe_mask = 0; int ret; mst_pipe_mask = get_pipes_downstream_of_mst_port(state, mst_mgr, NULL); @@ -834,16 +834,16 @@ static int intel_dp_mst_check_fec_change(struct intel_atomic_state *state, if (drm_WARN_ON(display->drm, !crtc_state)) return -EINVAL; - if (crtc_state->fec_enable) - fec_pipe_mask |= BIT(crtc->pipe); + if (intel_dsc_enabled_on_link(crtc_state)) + dsc_pipe_mask |= BIT(crtc->pipe); } - if (!fec_pipe_mask || mst_pipe_mask == fec_pipe_mask) + if (!dsc_pipe_mask || mst_pipe_mask == dsc_pipe_mask) return 0; - limits->force_fec_pipes |= mst_pipe_mask; + limits->link_dsc_pipes |= mst_pipe_mask; - ret = intel_modeset_pipes_in_mask_early(state, "MST FEC", + ret = intel_modeset_pipes_in_mask_early(state, "MST DSC", mst_pipe_mask); return ret ? : -EAGAIN; @@ -897,7 +897,7 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, int i; for_each_new_mst_mgr_in_state(&state->base, mgr, mst_state, i) { - ret = intel_dp_mst_check_fec_change(state, mgr, limits); + ret = intel_dp_mst_check_dsc_change(state, mgr, limits); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index f52dee0ea412..d2862de894fa 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -20,6 +20,7 @@ #include "intel_dp_tunnel.h" #include "intel_fdi.h" #include "intel_link_bw.h" +#include "intel_vdsc.h" static int get_forced_link_bpp_x16(struct intel_atomic_state *state, const struct intel_crtc *crtc) @@ -55,7 +56,7 @@ void intel_link_bw_init_limits(struct intel_atomic_state *state, struct intel_display *display = to_intel_display(state); enum pipe pipe; - limits->force_fec_pipes = 0; + limits->link_dsc_pipes = 0; limits->bpp_limit_reached_pipes = 0; for_each_pipe(display, pipe) { struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe); @@ -65,8 +66,8 @@ void intel_link_bw_init_limits(struct intel_atomic_state *state, if (state->base.duplicated && crtc_state) { limits->max_bpp_x16[pipe] = crtc_state->max_link_bpp_x16; - if (crtc_state->fec_enable) - limits->force_fec_pipes |= BIT(pipe); + if (intel_dsc_enabled_on_link(crtc_state)) + limits->link_dsc_pipes |= BIT(pipe); } else { limits->max_bpp_x16[pipe] = INT_MAX; } @@ -265,10 +266,10 @@ assert_link_limit_change_valid(struct intel_display *display, bool bpps_changed = false; enum pipe pipe; - /* FEC can't be forced off after it was forced on. */ + /* DSC can't be disabled after it was enabled. */ if (drm_WARN_ON(display->drm, - (old_limits->force_fec_pipes & new_limits->force_fec_pipes) != - old_limits->force_fec_pipes)) + (old_limits->link_dsc_pipes & new_limits->link_dsc_pipes) != + old_limits->link_dsc_pipes)) return false; for_each_pipe(display, pipe) { @@ -286,8 +287,8 @@ assert_link_limit_change_valid(struct intel_display *display, /* At least one limit must change. */ if (drm_WARN_ON(display->drm, !bpps_changed && - new_limits->force_fec_pipes == - old_limits->force_fec_pipes)) + new_limits->link_dsc_pipes == + old_limits->link_dsc_pipes)) return false; return true; diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index 95ab7c50c61d..cb18e171037c 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -15,7 +15,7 @@ struct intel_connector; struct intel_crtc_state; struct intel_link_bw_limits { - u8 force_fec_pipes; + u8 link_dsc_pipes; u8 bpp_limit_reached_pipes; /* in 1/16 bpp units */ int max_bpp_x16[I915_MAX_PIPES]; -- cgit v1.2.3 From c390bf07961b3a39f3417d75850a4e721b87e595 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 15 Oct 2025 19:19:34 +0300 Subject: drm/i915/dp: Fix panel replay when DSC is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent enabling panel replay if the sink doesn't support this due to DSC being enabled. Panel replay has two modes, updating full frames or only selected regions of the frame. If the sink doesn't support Panel Replay in full frame update mode with DSC prevent Panel Replay completely if DSC is enabled. If the sink doesn't support Panel Replay only in the selective update mode while DSC is enabled, it will still support Panel Replay in the full frame update mode, so only prevent selective updates in this case. v2: - Use Panel Replay instead of PR in debug prints. (Jouni) - Rebase on change tracking the link DSC state in the crtc state. Cc: Jouni Högander Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14869 Reviewed-by: Jouni Högander Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20251015161934.262108-8-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 9 +++ drivers/gpu/drm/i915/display/intel_dp.c | 2 + drivers/gpu/drm/i915/display/intel_psr.c | 93 ++++++++++++++++++++-- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 20747fa4d3da..1e0cd81c9474 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -955,6 +955,12 @@ struct intel_csc_matrix { u16 postoff[3]; }; +enum intel_panel_replay_dsc_support { + INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED, + INTEL_DP_PANEL_REPLAY_DSC_FULL_FRAME_ONLY, + INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE, +}; + struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1133,6 +1139,8 @@ struct intel_crtc_state { bool has_panel_replay; bool wm_level_disabled; bool pkg_c_latency_used; + /* Only used for state verification. */ + enum intel_panel_replay_dsc_support panel_replay_dsc_support; u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; @@ -1706,6 +1714,7 @@ struct intel_psr { bool source_panel_replay_support; bool sink_panel_replay_support; bool sink_panel_replay_su_support; + enum intel_panel_replay_dsc_support sink_panel_replay_dsc_support; bool panel_replay_enabled; u32 dc3co_exitline; u32 dc3co_exit_delay; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 95884af242b3..215ad690ab07 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6053,6 +6053,8 @@ intel_dp_detect(struct drm_connector *_connector, memset(connector->dp.dsc_dpcd, 0, sizeof(connector->dp.dsc_dpcd)); intel_dp->psr.sink_panel_replay_support = false; intel_dp->psr.sink_panel_replay_su_support = false; + intel_dp->psr.sink_panel_replay_dsc_support = + INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED; intel_dp_mst_disconnect(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 703e5f6af04c..3e99a65ec988 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -29,6 +29,7 @@ #include #include "i915_reg.h" +#include "i915_utils.h" #include "intel_alpm.h" #include "intel_atomic.h" #include "intel_crtc.h" @@ -50,6 +51,7 @@ #include "intel_snps_phy.h" #include "intel_step.h" #include "intel_vblank.h" +#include "intel_vdsc.h" #include "intel_vrr.h" #include "skl_universal_plane.h" @@ -580,6 +582,44 @@ exit: intel_dp->psr.su_y_granularity = y; } +static enum intel_panel_replay_dsc_support +compute_pr_dsc_support(struct intel_dp *intel_dp) +{ + u8 pr_dsc_mode; + u8 val; + + val = intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_CAPABILITY)]; + pr_dsc_mode = REG_FIELD_GET8(DP_PANEL_REPLAY_DSC_DECODE_CAPABILITY_IN_PR_MASK, val); + + switch (pr_dsc_mode) { + case DP_DSC_DECODE_CAPABILITY_IN_PR_FULL_FRAME_ONLY: + return INTEL_DP_PANEL_REPLAY_DSC_FULL_FRAME_ONLY; + case DP_DSC_DECODE_CAPABILITY_IN_PR_SUPPORTED: + return INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE; + default: + MISSING_CASE(pr_dsc_mode); + fallthrough; + case DP_DSC_DECODE_CAPABILITY_IN_PR_NOT_SUPPORTED: + case DP_DSC_DECODE_CAPABILITY_IN_PR_RESERVED: + return INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED; + } +} + +static const char *panel_replay_dsc_support_str(enum intel_panel_replay_dsc_support dsc_support) +{ + switch (dsc_support) { + case INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED: + return "not supported"; + case INTEL_DP_PANEL_REPLAY_DSC_FULL_FRAME_ONLY: + return "full frame only"; + case INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE: + return "selective update"; + default: + MISSING_CASE(dsc_support); + return "n/a"; + }; +} + static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); @@ -615,10 +655,13 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) DP_PANEL_REPLAY_SU_SUPPORT) intel_dp->psr.sink_panel_replay_su_support = true; + intel_dp->psr.sink_panel_replay_dsc_support = compute_pr_dsc_support(intel_dp); + drm_dbg_kms(display->drm, - "Panel replay %sis supported by panel\n", + "Panel replay %sis supported by panel (in DSC mode: %s)\n", intel_dp->psr.sink_panel_replay_su_support ? - "selective_update " : ""); + "selective_update " : "", + panel_replay_dsc_support_str(intel_dp->psr.sink_panel_replay_dsc_support)); } static void _psr_init_dpcd(struct intel_dp *intel_dp) @@ -1606,9 +1649,21 @@ static bool intel_sel_update_config_valid(struct intel_dp *intel_dp, goto unsupported; } - if (crtc_state->has_panel_replay && (DISPLAY_VER(display) < 14 || - !intel_dp->psr.sink_panel_replay_su_support)) - goto unsupported; + if (crtc_state->has_panel_replay) { + if (DISPLAY_VER(display) < 14) + goto unsupported; + + if (!intel_dp->psr.sink_panel_replay_su_support) + goto unsupported; + + if (intel_dsc_enabled_on_link(crtc_state) && + intel_dp->psr.sink_panel_replay_dsc_support != + INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE) { + drm_dbg_kms(display->drm, + "Selective update with Panel Replay not enabled because it's not supported with DSC\n"); + goto unsupported; + } + } if (crtc_state->crc_enabled) { drm_dbg_kms(display->drm, @@ -1685,6 +1740,14 @@ _panel_replay_compute_config(struct intel_dp *intel_dp, return false; } + if (intel_dsc_enabled_on_link(crtc_state) && + intel_dp->psr.sink_panel_replay_dsc_support == + INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED) { + drm_dbg_kms(display->drm, + "Panel Replay not enabled because it's not supported with DSC\n"); + return false; + } + if (!intel_dp_is_edp(intel_dp)) return true; @@ -1790,6 +1853,8 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, return; } + /* Only used for state verification. */ + crtc_state->panel_replay_dsc_support = intel_dp->psr.sink_panel_replay_dsc_support; crtc_state->has_panel_replay = _panel_replay_compute_config(intel_dp, crtc_state, conn_state); @@ -2991,6 +3056,20 @@ void intel_psr_pre_plane_update(struct intel_atomic_state *state, } } +static void +verify_panel_replay_dsc_state(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (!crtc_state->has_panel_replay) + return; + + drm_WARN_ON(display->drm, + intel_dsc_enabled_on_link(crtc_state) && + crtc_state->panel_replay_dsc_support == + INTEL_DP_PANEL_REPLAY_DSC_NOT_SUPPORTED); +} + void intel_psr_post_plane_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -3002,6 +3081,8 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, if (!crtc_state->has_psr) return; + verify_panel_replay_dsc_state(crtc_state); + for_each_intel_encoder_mask_with_psr(state->base.dev, encoder, crtc_state->uapi.encoder_mask) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -4031,6 +4112,8 @@ static void intel_psr_sink_capability(struct intel_dp *intel_dp, seq_printf(m, ", Panel Replay = %s", str_yes_no(psr->sink_panel_replay_support)); seq_printf(m, ", Panel Replay Selective Update = %s", str_yes_no(psr->sink_panel_replay_su_support)); + seq_printf(m, ", Panel Replay DSC support = %s", + panel_replay_dsc_support_str(psr->sink_panel_replay_dsc_support)); if (intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & DP_PANEL_REPLAY_EARLY_TRANSPORT_SUPPORT) seq_printf(m, " (Early Transport)"); -- cgit v1.2.3 From 77fb33cb7ccb7c76e3847795b7ffb29906d14b0b Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 17 Oct 2025 18:05:00 +0530 Subject: drm/i915/psr: Add helper to get min psr guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a helper to compute the max link wake latency when using Auxless/Aux wake mechanism for PSR/Panel Replay/LOBF features. This will be used to compute the minimum guardband so that the link wake latencies are accounted and these features work smoothly for higher refresh rate panels. v2: - Account for flag `req_psr2_sdp_prior_scanline` and scl lines while computing min guardband. (Jouni) - Use wake lines only for eDP with panel_replay and sel_update flags set. (Jouni) Bspec: 70151, 71477 Signed-off-by: Ankit Nautiyal Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251017123504.2247954-2-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_psr.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 3e99a65ec988..9c7364d6d69d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -4499,3 +4499,30 @@ void intel_psr_compute_config_late(struct intel_dp *intel_dp, intel_psr_set_non_psr_pipes(intel_dp, crtc_state); } + +int intel_psr_min_guardband(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + int psr_min_guardband; + int wake_lines; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return 0; + + if (crtc_state->has_panel_replay) + wake_lines = crtc_state->alpm_state.aux_less_wake_lines; + else if (crtc_state->has_sel_update) + wake_lines = DISPLAY_VER(display) < 20 ? + psr2_block_count_lines(crtc_state->alpm_state.io_wake_lines, + crtc_state->alpm_state.fast_wake_lines) : + crtc_state->alpm_state.io_wake_lines; + else + return 0; + + psr_min_guardband = wake_lines + crtc_state->set_context_latency; + + if (crtc_state->req_psr2_sdp_prior_scanline) + psr_min_guardband++; + + return psr_min_guardband; +} diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index b17ce312dc37..620b35928832 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -85,5 +85,6 @@ bool intel_psr_needs_alpm_aux_less(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_psr_compute_config_late(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); +int intel_psr_min_guardband(struct intel_crtc_state *crtc_state); #endif /* __INTEL_PSR_H__ */ -- cgit v1.2.3 From 52ecd48b8d3f5206049758d95ca5b291397b3209 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 17 Oct 2025 18:05:01 +0530 Subject: drm/i915/dp: Add helper to get min sdp guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to compute vblank time needed for transmitting specific DisplayPort SDPs like PPS, GAMUT_METADATA, and VSC_EXT. Latency is based on line count per packet type. This will be used to ensure adequate guardband when features like DSC/HDR are enabled. v2: Correct the lines required for PPS SDP. (Jouni) Bspec: 70151 Signed-off-by: Ankit Nautiyal Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251017123504.2247954-3-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 36 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 2 ++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 215ad690ab07..9441ef685200 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7002,3 +7002,39 @@ int intel_dp_compute_config_late(struct intel_encoder *encoder, return 0; } + +static +int intel_dp_get_lines_for_sdp(u32 type) +{ + switch (type) { + case DP_SDP_VSC_EXT_VESA: + case DP_SDP_VSC_EXT_CEA: + return 10; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + return 8; + case DP_SDP_PPS: + return 7; + default: + break; + } + + return 0; +} + +int intel_dp_sdp_min_guardband(const struct intel_crtc_state *crtc_state, + bool assume_all_enabled) +{ + int sdp_guardband = 0; + + if (assume_all_enabled || + crtc_state->infoframes.enable & + intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA)) + sdp_guardband = max(sdp_guardband, + intel_dp_get_lines_for_sdp(HDMI_PACKET_TYPE_GAMUT_METADATA)); + + if (assume_all_enabled || + crtc_state->dsc.compression_enable) + sdp_guardband = max(sdp_guardband, intel_dp_get_lines_for_sdp(DP_SDP_PPS)); + + return sdp_guardband; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 0537be20fe7b..200a8b267f64 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -223,5 +223,7 @@ bool intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state); int intel_dp_compute_config_late(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state); +int intel_dp_sdp_min_guardband(const struct intel_crtc_state *crtc_state, + bool assume_all_enabled); #endif /* __INTEL_DP_H__ */ -- cgit v1.2.3 From 69d640edc2630d903f25ab8ff8bf7d96d0776ce2 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 17 Oct 2025 18:05:02 +0530 Subject: drm/i915/dp: Check if guardband can accommodate sdp latencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if guardband is sufficient for all DP SDP latencies. If its not, fail .compute_config_late(). Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251017123504.2247954-4-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 9441ef685200..b0aeb6c2de86 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -93,6 +93,7 @@ #include "intel_psr.h" #include "intel_quirks.h" #include "intel_tc.h" +#include "intel_vblank.h" #include "intel_vdsc.h" #include "intel_vrr.h" @@ -6992,14 +6993,35 @@ void intel_dp_mst_resume(struct intel_display *display) } } +static +int intel_dp_sdp_compute_config_late(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + int guardband = intel_crtc_vblank_length(crtc_state); + int min_sdp_guardband = intel_dp_sdp_min_guardband(crtc_state, false); + + if (guardband < min_sdp_guardband) { + drm_dbg_kms(display->drm, "guardband %d < min sdp guardband %d\n", + guardband, min_sdp_guardband); + return -EINVAL; + } + + return 0; +} + int intel_dp_compute_config_late(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + int ret; intel_psr_compute_config_late(intel_dp, crtc_state); + ret = intel_dp_sdp_compute_config_late(crtc_state); + if (ret) + return ret; + return 0; } -- cgit v1.2.3 From b2f88d7d5a977120bb9e42af686eb5454d128682 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 17 Oct 2025 18:05:03 +0530 Subject: drm/i915/vrr: Use the min static optimized guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current VRR implementation, vrr.vmin and vrr.guardband are set such that they do not need to change when switching from fixed refresh rate to variable refresh rate. Specifically, vrr.guardband is always set to match the vblank length. This approach works for most cases, but not for LRR, where the guardband would need to change while the VRR timing generator is still active. With the VRR TG always active, live updates to guardband are unsafe and not recommended. To ensure hardware safety, guardband was moved out of the !fastset block, meaning any change now requires a full modeset. This breaks seamless LRR switching, which was previously supported. Since the problem arises from guardband being matched to the vblank length, solution is to use a minimal, sufficient static value, instead. So we use a static guardband defined during mode-set that fits within the smallest expected vblank and remains unchanged in case of features like LRR where vtotal changes. To compute this minimum guardband we take into account latencies/delays due to different features as mentioned in the Bspec. Introduce a helper to compute the minimal sufficient guardband. On platforms where the VRR timing generator is always ON, we optimize the guardband regardless of whether the display is operating in fixed or variable refresh rate mode. v2: - Use max of sagv latency and skl_wm_latency(1) for PM delay computation. (Ville) - Avoid guardband optimization for HDMI for now. (Ville) - Allow guardband optimization only for platforms with intel_vrr_always_use_vrr_tg = true. (Ville) - Add comments for PM delay and a #TODO note for HDMI. v3: Drop the variable prefill_min_guardband. (Ville) Bspec: 70151 Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251017123504.2247954-5-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 58 ++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 597008a6c744..3da84a247193 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -10,8 +10,11 @@ #include "intel_display_regs.h" #include "intel_display_types.h" #include "intel_dp.h" +#include "intel_psr.h" #include "intel_vrr.h" #include "intel_vrr_regs.h" +#include "skl_prefill.h" +#include "skl_watermark.h" #define FIXED_POINT_PRECISION 100 #define CMRR_PRECISION_TOLERANCE 10 @@ -433,17 +436,68 @@ intel_vrr_max_guardband(struct intel_crtc_state *crtc_state) intel_vrr_max_vblank_guardband(crtc_state)); } +static +int intel_vrr_compute_optimized_guardband(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct skl_prefill_ctx prefill_ctx; + int prefill_latency_us; + int guardband = 0; + + skl_prefill_init_worst(&prefill_ctx, crtc_state); + + /* + * The SoC power controller runs SAGV mutually exclusive with package C states, + * so the max of package C and SAGV latencies is used to compute the min prefill guardband. + * PM delay = max(sagv_latency, pkgc_max_latency (highest enabled wm level 1 and up)) + */ + prefill_latency_us = max(display->sagv.block_time_us, + skl_watermark_max_latency(display, 1)); + + guardband = skl_prefill_min_guardband(&prefill_ctx, + crtc_state, + prefill_latency_us); + + if (intel_crtc_has_dp_encoder(crtc_state)) { + guardband = max(guardband, intel_psr_min_guardband(crtc_state)); + guardband = max(guardband, intel_dp_sdp_min_guardband(crtc_state, true)); + } + + return guardband; +} + +static bool intel_vrr_use_optimized_guardband(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + /* + * #TODO: Enable optimized guardband for HDMI + * For HDMI lot of infoframes are transmitted a line or two after vsync. + * Since with optimized guardband the double bufferring point is at delayed vblank, + * we need to ensure that vsync happens after delayed vblank for the HDMI case. + */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return false; + + return intel_vrr_always_use_vrr_tg(display); +} + void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; struct drm_display_mode *pipe_mode = &crtc_state->hw.pipe_mode; + int guardband; if (!intel_vrr_possible(crtc_state)) return; - crtc_state->vrr.guardband = min(crtc_state->vrr.vmin - adjusted_mode->crtc_vdisplay, - intel_vrr_max_guardband(crtc_state)); + if (intel_vrr_use_optimized_guardband(crtc_state)) + guardband = intel_vrr_compute_optimized_guardband(crtc_state); + else + guardband = crtc_state->vrr.vmin - adjusted_mode->crtc_vdisplay; + + crtc_state->vrr.guardband = min(guardband, intel_vrr_max_guardband(crtc_state)); if (intel_vrr_always_use_vrr_tg(display)) { adjusted_mode->crtc_vblank_start = -- cgit v1.2.3 From 4ba596539e9836b8941b408769b04e1befc74364 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 17 Oct 2025 18:05:04 +0530 Subject: drm/i915/vrr: Use optimized guardband whenever VRR TG is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the guardband is optimized only for platforms where the VRR timing generator is always ON. Extend the usage of optimized guardband to all VRR supporting platforms. v2: Drop check for `crtc_state->vrr.enable` and just return true unconditionally from intel_vrr_use_optimized_guardband(). (Ville) Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251017123504.2247954-6-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_vrr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 3da84a247193..92fb72b56f16 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -468,8 +468,6 @@ int intel_vrr_compute_optimized_guardband(struct intel_crtc_state *crtc_state) static bool intel_vrr_use_optimized_guardband(const struct intel_crtc_state *crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); - /* * #TODO: Enable optimized guardband for HDMI * For HDMI lot of infoframes are transmitted a line or two after vsync. @@ -479,7 +477,7 @@ static bool intel_vrr_use_optimized_guardband(const struct intel_crtc_state *crt if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return false; - return intel_vrr_always_use_vrr_tg(display); + return true; } void intel_vrr_compute_guardband(struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From a63db39a578b543f5e5719b9f14dd82d3b8648d1 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Thu, 16 Oct 2025 18:45:17 +0530 Subject: drm/i915/xe3lpd: Load DMC for Xe3_LPD version 30.02 Load the DMC for Xe3_LPD version 30.02. Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Gustavo Sousa Reviewed-by: Chaitanya Kumar Borah Link: https://lore.kernel.org/r/20251016131517.2032684-1-dnyaneshwar.bhadane@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_dmc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 517bebb0b4aa..be6d37ea1139 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -127,6 +127,9 @@ static bool dmc_firmware_param_disabled(struct intel_display *display) #define DISPLAY_VER13_DMC_MAX_FW_SIZE 0x20000 #define DISPLAY_VER12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE +#define XE3LPD_3002_DMC_PATH DMC_PATH(xe3lpd_3002) +MODULE_FIRMWARE(XE3LPD_3002_DMC_PATH); + #define XE3LPD_DMC_PATH DMC_PATH(xe3lpd) MODULE_FIRMWARE(XE3LPD_DMC_PATH); @@ -183,9 +186,10 @@ static const char *dmc_firmware_default(struct intel_display *display, u32 *size { const char *fw_path = NULL; u32 max_fw_size = 0; - - if (DISPLAY_VERx100(display) == 3002 || - DISPLAY_VERx100(display) == 3000) { + if (DISPLAY_VERx100(display) == 3002) { + fw_path = XE3LPD_3002_DMC_PATH; + max_fw_size = XE2LPD_DMC_MAX_FW_SIZE; + } else if (DISPLAY_VERx100(display) == 3000) { fw_path = XE3LPD_DMC_PATH; max_fw_size = XE2LPD_DMC_MAX_FW_SIZE; } else if (DISPLAY_VERx100(display) == 2000) { -- cgit v1.2.3 From 3d72d2e5f4c47d88f292c915ce31ef14092df3f2 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Sun, 28 Sep 2025 14:25:27 +0800 Subject: drm/amdgpu: Intercept ras interrupts to ras module Intercept ras interrupts to ras module. V2: Change function names in ras module. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 4 ++-- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h | 4 ++-- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a2879d2b7c8e..644f79f3c9af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,7 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#include "amdgpu_ras_mgr.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -746,6 +747,20 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) { + + if (amdgpu_uniras_enabled(adev)) { + struct ras_ih_info ih_info; + + memset(&ih_info, 0, sizeof(ih_info)); + ih_info.block = block; + ih_info.pasid = pasid; + ih_info.reset = reset; + ih_info.pasid_fn = pasid_fn; + ih_info.data = data; + amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info); + return; + } + amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e0ee21150860..9aa4b93ac6af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -41,6 +41,7 @@ #include "atom.h" #include "amdgpu_reset.h" #include "amdgpu_psp.h" +#include "amdgpu_ras_mgr.h" #ifdef CONFIG_X86_MCE_AMD #include @@ -2241,6 +2242,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) return; + if (amdgpu_uniras_enabled(adev)) { + amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL); + return; + } + if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev); @@ -2411,6 +2417,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_manager *obj; struct ras_ih_data *data; + if (amdgpu_uniras_enabled(adev)) { + struct ras_ih_info ih_info; + + memset(&ih_info, 0, sizeof(ih_info)); + ih_info.block = info->head.block; + memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry)); + + return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info); + } + obj = amdgpu_ras_find_obj(adev, &info->head); if (!obj) return -EINVAL; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index e66d915831a9..a8d02bd42f90 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -381,7 +381,7 @@ static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = { .hw_fini = amdgpu_ras_mgr_hw_fini, }; -int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable) +int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); @@ -395,7 +395,7 @@ int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable) return ras_core_set_status(ras_mgr->ras_core, enable); } -bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev) +bool amdgpu_uniras_enabled(struct amdgpu_device *adev) { struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index fa761de381c1..8d6a1873b666 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -56,8 +56,8 @@ struct amdgpu_ras_mgr { struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context( struct amdgpu_device *adev); -int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable); -bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev); +int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable); +bool amdgpu_uniras_enabled(struct amdgpu_device *adev); int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data); -- cgit v1.2.3 From 408bd841ad24fd9891a1cd9ca11cf4f48d9667dc Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 21 Jul 2025 15:14:03 +0800 Subject: drm/amdgpu: Improve ras fatal error handling function In multi-gpu case, a fatal error will generate several fatal error interrupts. After improving this function, the ras module can reuse this function to only handle the first interrupt. V3: Initialize event_id using RAS_EVENT_INVALID_ID. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c | 5 +++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9aa4b93ac6af..9e632adc7746 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4650,20 +4650,18 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type return id; } -void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) +int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); enum ras_event_type type = RAS_EVENT_TYPE_FATAL; - u64 event_id; + u64 event_id = RAS_EVENT_INVALID_ID; - if (amdgpu_ras_mark_ras_event(adev, type)) { - dev_err(adev->dev, - "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n"); - return; - } + if (amdgpu_uniras_enabled(adev)) + return 0; - event_id = amdgpu_ras_acquire_event_id(adev, type); + if (!amdgpu_ras_mark_ras_event(adev, type)) + event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); @@ -4672,6 +4670,8 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } + + return -EBUSY; } bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9f21b6cf8724..556cf4d7b5ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -910,7 +910,7 @@ static inline void amdgpu_ras_intr_cleared(void) atomic_set(&amdgpu_ras_in_intr, 0); } -void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); +int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c index 40071b876333..f21cd55a25be 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c @@ -29,8 +29,13 @@ static int amdgpu_ras_sys_detect_fatal_event(struct ras_core_context *ras_core, void *data) { struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + int ret; uint64_t seq_no; + ret = amdgpu_ras_global_ras_isr(adev); + if (ret) + return ret; + seq_no = amdgpu_ras_mgr_gen_ras_event_seqno(adev, RAS_SEQNO_TYPE_UE); RAS_DEV_INFO(adev, "{%llu} Uncorrectable hardware error(ERREVENT_ATHUB_INTERRUPT) detected!\n", -- cgit v1.2.3 From fe0f51d6d8fb4dced30e523dccb05ea896b06749 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 17 Jun 2025 15:16:59 +0800 Subject: drm/amdgpu: add ras module rma check Add ras module rma check. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9e632adc7746..91c746c5dc18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -5424,6 +5424,9 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + if (amdgpu_uniras_enabled(adev)) + return amdgpu_ras_mgr_is_rma(adev); + if (!con) return false; -- cgit v1.2.3 From 47ba675a195b3d7c9836c85398c8c94b552cd8c7 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 21 Jul 2025 15:15:53 +0800 Subject: drm/amdgpu: Avoid loading bad pages into legacy ras When ras module is enabled, the bad pages will be loaded by ras module. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 91c746c5dc18..5b610f668fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3606,6 +3606,9 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) if (!con || amdgpu_sriov_vf(adev)) return 0; + if (amdgpu_uniras_enabled(adev)) + return 0; + control = &con->eeprom_control; ret = amdgpu_ras_eeprom_init(control); control->is_eeprom_valid = !ret; -- cgit v1.2.3 From 1d87afd61055e0d5400b34bc126ec529c27f6a53 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 24 Mar 2025 18:20:17 +0800 Subject: drm/amdgpu: Add poison consumption sequence numbers for gfx and sdma Add poison consumption sequence numbers for gfx and sdma. V3: Use RAS_EVENT_LOG to print ras log info. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 4ceb251312a6..d76fb61869c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -28,6 +28,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_smi_events.h" #include "amdgpu_ras.h" +#include "amdgpu_ras_mgr.h" /* * GFX9 SQ Interrupts @@ -228,7 +229,11 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, kfd_signal_poison_consumed_event(dev, pasid); - event_id = amdgpu_ras_acquire_event_id(dev->adev, type); + if (amdgpu_uniras_enabled(dev->adev)) + event_id = amdgpu_ras_mgr_gen_ras_event_seqno(dev->adev, + RAS_SEQNO_TYPE_POISON_CONSUMPTION); + else + event_id = amdgpu_ras_acquire_event_id(dev->adev, type); RAS_EVENT_LOG(dev->adev, event_id, "poison is consumed by client %d, kick off gpu reset flow\n", client_id); -- cgit v1.2.3 From 43a90c0732ddb1eb5e6b1e6a9e454d66bd33f657 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 25 Mar 2025 14:11:10 +0800 Subject: drm/amdgpu: Avoid hive seqno increment in legacy ras The hive->event_mgr variable is used by both ras module and legacy ras. To ensure the continuity of hive seqno growth, after enabling ras module, it is forbidden to operate the event_mgr variable in legacy ras. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5b610f668fcc..bc3f34c2c2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4603,6 +4603,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ struct ras_event_state *event_state; int ret = 0; + if (amdgpu_uniras_enabled(adev)) + return 0; + if (type >= RAS_EVENT_TYPE_COUNT) { ret = -EINVAL; goto out; -- cgit v1.2.3 From 04226ae1bc236e3307d9662c7ae6f73ce3ffba71 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 26 Mar 2025 18:03:49 +0800 Subject: drm/amdgpu: Add ras module eeprom safety watermark check Add ras module eeprom safety watermark check. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 3eb3fb55ccb0..5a7bf0661dbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -32,6 +32,7 @@ #include #include "amdgpu_reset.h" +#include "amdgpu_ras_mgr.h" /* These are memory addresses as would be seen by one or more EEPROM * chips strung on the I2C bus, usually by manipulating pins 1-3 of a @@ -556,6 +557,9 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + if (amdgpu_uniras_enabled(adev)) + return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev); + if (!__is_ras_eeprom_supported(adev) || !amdgpu_bad_page_threshold) return false; -- cgit v1.2.3 From 9dddfac98e3aa2e0c5164af948edd1fc4b550713 Mon Sep 17 00:00:00 2001 From: Harry VanZyllDeJong Date: Wed, 17 Sep 2025 16:46:13 -0400 Subject: drm/amd/display: fix duplicate aux command with AMD aux backlight when using AMD aux backlight control, we avoid sending backlight update commands to DMUB firmware because it is controlled by aux commands in driver. Reviewed-by: Iswara Nagulendran Reviewed-by: Aric Cyr Signed-off-by: Harry VanZyllDeJong Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index b822f2dffff0..d1ecdb92b072 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -710,7 +710,8 @@ bool dcn31_set_backlight_level(struct pipe_ctx *pipe_ctx, panel_cntl->inst, panel_cntl->pwrseq_inst); - dmub_abm_set_backlight(dc, backlight_level_params, panel_cntl->inst); + if (backlight_level_params->control_type != BACKLIGHT_CONTROL_AMD_AUX) + dmub_abm_set_backlight(dc, backlight_level_params, panel_cntl->inst); return true; } -- cgit v1.2.3 From f9491b67630ea7eeb9d19091ba9c0f3f6af82a81 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 25 Sep 2025 15:01:23 -0400 Subject: drm/amd/display: Add debug option to override EASF scaler taps [Why & How] Add new option override_easf to use in_taps instead of internal taps policy for debugging Reviewed-by: Alvin Lee Signed-off-by: Samson Tam Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_spl_translate.c | 2 ++ drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c | 15 +++++++++++++++ drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h | 1 + 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 55704d4457ef..37d1a79e8241 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -147,6 +147,8 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->prefer_easf = false; else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 2) spl_in->disable_easf = true; + else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 3) + spl_in->override_easf = true; /* Translate adaptive sharpening preference */ unsigned int sharpness_setting = pipe_ctx->stream->ctx->dc->debug.force_sharpness; unsigned int force_sharpness_level = pipe_ctx->stream->ctx->dc->debug.force_sharpness_level; diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c index b1fb0f8a253a..7a839984dbc0 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c @@ -1018,6 +1018,21 @@ static bool spl_get_optimal_number_of_taps( spl_scratch->scl_data.taps.h_taps_c = 6; spl_scratch->scl_data.taps.v_taps_c = 6; } + + /* Override mode: keep EASF enabled but use input taps if valid */ + if (spl_in->override_easf) { + spl_scratch->scl_data.taps.h_taps = (in_taps->h_taps != 0) ? in_taps->h_taps : spl_scratch->scl_data.taps.h_taps; + spl_scratch->scl_data.taps.v_taps = (in_taps->v_taps != 0) ? in_taps->v_taps : spl_scratch->scl_data.taps.v_taps; + spl_scratch->scl_data.taps.h_taps_c = (in_taps->h_taps_c != 0) ? in_taps->h_taps_c : spl_scratch->scl_data.taps.h_taps_c; + spl_scratch->scl_data.taps.v_taps_c = (in_taps->v_taps_c != 0) ? in_taps->v_taps_c : spl_scratch->scl_data.taps.v_taps_c; + + if ((spl_scratch->scl_data.taps.h_taps > 6) || (spl_scratch->scl_data.taps.v_taps > 6)) + skip_easf = true; + if ((spl_scratch->scl_data.taps.h_taps > 1) && (spl_scratch->scl_data.taps.h_taps % 2)) + spl_scratch->scl_data.taps.h_taps--; + if ((spl_scratch->scl_data.taps.h_taps_c > 1) && (spl_scratch->scl_data.taps.h_taps_c % 2)) + spl_scratch->scl_data.taps.h_taps_c--; + } } /*Ensure we can support the requested number of vtaps*/ diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h index 23d254dea18f..20e4e52a77ac 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h @@ -545,6 +545,7 @@ struct spl_in { enum linear_light_scaling lls_pref; // Linear Light Scaling bool prefer_easf; bool disable_easf; + bool override_easf; /* If true, keep EASF enabled but use provided in_taps */ struct spl_debug debug; bool is_fullscreen; bool is_hdr_on; -- cgit v1.2.3 From 279d519fdaf170eaca15037ed6a6e2faad91dbd4 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 26 Sep 2025 15:51:15 -0400 Subject: drm/amd/display: add dispclk ramping to dcn35. [why] this is a required logic based on HW programming guide. tested/ported on dcn401. Reviewed-by: Yihan Zhu Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index c899c09ea31b..e097d52956b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1114,6 +1114,16 @@ static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) if (dispclk_rdivider_value != 0) REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value); } +static void dccg35_wait_for_dentist_change_done( + struct dccg *dccg) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + uint32_t dentist_dispclk_value = REG_READ(DENTIST_DISPCLK_CNTL); + + REG_WRITE(DENTIST_DISPCLK_CNTL, dentist_dispclk_value); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000); +} static void dcn35_set_dppclk_enable(struct dccg *dccg, uint32_t dpp_inst, uint32_t enable) @@ -1300,6 +1310,8 @@ static void dccg35_set_pixel_rate_div( BREAK_TO_DEBUGGER(); return; } + if (otg_inst < 4) + dccg35_wait_for_dentist_change_done(dccg); } static void dccg35_set_dtbclk_p_src( -- cgit v1.2.3 From dc69b48988b171d6ccb3a083607e4dff015e2c0d Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 29 Sep 2025 14:28:34 -0400 Subject: drm/amd/display: Fix NULL pointer dereference [Why] On a mst branch with multi display setup, dc context is obselete after updating the first stream. Referencing the same dc context for the next stream update to fetch dc pointer leads to NULL pointer dereference. [How] Get the dc pointer from the link rather than context. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Charlene Liu Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 1593412354cf..1045c268672e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -78,6 +78,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, struct audio_output audio_output[MAX_PIPES]; struct dc_stream_state *streams_on_link[MAX_PIPES]; int num_streams_on_link = 0; + struct dc *dc = (struct dc *)link->dc; needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); @@ -150,7 +151,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { stream_update.stream = streams_on_link[i]; stream_update.dpms_off = &dpms_off; - dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update); + dc_update_planes_and_stream(dc, NULL, 0, streams_on_link[i], &stream_update); } } } -- cgit v1.2.3 From cb689a474bdec2bc3ed9ff8481e2e075c9791815 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 29 Sep 2025 14:47:51 -0400 Subject: drm/amd/display: Remove unused field in DML Remove unused fields. Reviewed-by: Austin Zheng Signed-off-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 91955bbe24b8..8e5a30287220 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -46,7 +46,6 @@ struct dml2_display_dlg_regs { uint32_t dst_y_delta_drq_limit; uint32_t refcyc_per_vm_dmdata; uint32_t dmdata_dl_delta; - uint32_t dst_y_svp_drq_limit; // MRQ uint32_t refcyc_per_meta_chunk_vblank_l; -- cgit v1.2.3 From a1829659b5f48a8ac88ea1f1a48de2e13a21dbae Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 29 Sep 2025 15:15:13 -0400 Subject: drm/amd/display: add dccg dfs mask def [why] add some register masks for DCCG Reviewed-by: Yihan Zhu Reviewed-by: Alvin Lee Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h index 51f98c5c51c4..ad7a5850d31b 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h @@ -231,6 +231,14 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_DELAY, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_STEP_SIZE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_FREQ_RAMP_DONE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_MAX_ERRDET_CYCLES, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_RESET, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\ struct dccg *dccg35_create( struct dc_context *ctx, -- cgit v1.2.3 From f7124dd568a6614866ba1c890e53e30fa09ae6ad Mon Sep 17 00:00:00 2001 From: Adi Gollamudi Date: Sun, 12 Oct 2025 12:13:19 -0700 Subject: drm/amd/display: fix typo in display_mode_core_structs.h Fix a typo in a comment, change "enviroment" to "environment" in drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h Signed-off-by: Aditya Gollamudi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h index dbeb08466092..d57717b023eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h @@ -1894,7 +1894,7 @@ struct display_mode_lib_scratch_st { struct CalculatePrefetchSchedule_params_st CalculatePrefetchSchedule_params; }; -/// @brief Represent the overall soc/ip enviroment. It contains data structure represent the soc/ip characteristic and also structures that hold calculation output +/// @brief Represent the overall soc/ip environment. It contains data structure represent the soc/ip characteristic and also structures that hold calculation output struct display_mode_lib_st { dml_uint_t project; -- cgit v1.2.3 From 3471b9a31ce352ffb343cf02a991261880aac3a7 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Wed, 24 Sep 2025 09:33:35 -0400 Subject: drm/amd/display: Rework HDMI data channel reads Fix the HDMI data channel reads to respect scdc_present field to pass compliance test. Reviewed-by: Wenjing Liu Signed-off-by: Relja Vojvodic Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/link/link_detection.c | 4 ++++ drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b5aa03a3e39c..8f8ccde7ad94 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -212,6 +212,7 @@ struct dc_edid_caps { bool edid_hdmi; bool hdr_supported; bool rr_capable; + bool scdc_present; struct dc_panel_patch panel_patch; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 85303167a553..82a9e52d5ae5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -270,6 +270,10 @@ static void read_scdc_caps(struct ddc_service *ddc_service, uint8_t slave_address = HDMI_SCDC_ADDRESS; uint8_t offset = HDMI_SCDC_MANUFACTURER_OUI; + if (ddc_service->link->local_sink && + !ddc_service->link->local_sink->edid_caps.scdc_present) + return; + link_query_ddc_data(ddc_service, slave_address, &offset, sizeof(offset), sink->scdc_caps.manufacturer_OUI.byte, sizeof(sink->scdc_caps.manufacturer_OUI.byte)); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c index 267180e7bc48..5d2bcce2f669 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c @@ -549,7 +549,8 @@ void write_scdc_data(struct ddc_service *ddc_service, /*Lower than 340 Scramble bit from SCDC caps*/ if (ddc_service->link->local_sink && - ddc_service->link->local_sink->edid_caps.panel_patch.skip_scdc_overwrite) + (ddc_service->link->local_sink->edid_caps.panel_patch.skip_scdc_overwrite || + !ddc_service->link->local_sink->edid_caps.scdc_present)) return; link_query_ddc_data(ddc_service, slave_address, &offset, -- cgit v1.2.3 From d7f5a61e1b04ed87b008c8d327649d184dc5bb45 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 29 Sep 2025 20:29:30 -0400 Subject: drm/amd/display: increase max link count and fix link->enc NULL pointer access [why] 1.) dc->links[MAX_LINKS] array size smaller than actual requested. max_connector + max_dpia + 4 virtual = 14. increase from 12 to 14. 2.) hw_init() access null LINK_ENC for dpia non display_endpoint. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 71c2cfa4df75..2b49b8327374 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -200,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; + if (link->ep_type != DISPLAY_ENDPOINT_PHY) + continue; + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 41c76ba9ba56..62a39204fe0b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -44,7 +44,13 @@ */ #define MAX_PIPES 6 #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) -#define MAX_LINKS (MAX_PIPES * 2 +2) + +#define MAX_DPIA 6 +#define MAX_CONNECTOR 6 +#define MAX_VIRTUAL_LINKS 4 + +#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 -- cgit v1.2.3 From 11d672db1bb16a51ed300f3e8579d56e2ca5920f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 30 Sep 2025 17:28:54 -0400 Subject: drm/amd/display: Update DCN401 path for cursor offload [Description] The DCN401 cursor offload path needs to take into account use_mall_for_cursor, and also need to ensure the dcn32 function assigns the cursor cache fields (DCN401 uses the dcn32 implementation). Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c | 70 +++++++++++++--------- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 1 + 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index a5f23bb2a76a..41c8f78efdc3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -118,29 +118,7 @@ void hubp32_cursor_set_attributes( uint32_t cursor_width = ((attr->width + 63) / 64) * 64; uint32_t cursor_height = attr->height; uint32_t cursor_size = cursor_width * cursor_height; - - hubp->curs_attr = *attr; - - REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, - CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); - REG_UPDATE(CURSOR_SURFACE_ADDRESS, - CURSOR_SURFACE_ADDRESS, attr->address.low_part); - - REG_UPDATE_2(CURSOR_SIZE, - CURSOR_WIDTH, attr->width, - CURSOR_HEIGHT, attr->height); - - REG_UPDATE_4(CURSOR_CONTROL, - CURSOR_MODE, attr->color_format, - CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, - CURSOR_PITCH, hw_pitch, - CURSOR_LINES_PER_CHUNK, lpc); - - REG_SET_2(CURSOR_SETTINGS, 0, - /* no shift of the cursor HDL schedule */ - CURSOR0_DST_Y_OFFSET, 0, - /* used to shift the cursor chunk request deadline */ - CURSOR0_CHUNK_HDL_ADJUST, 3); + bool use_mall_for_cursor; switch (attr->color_format) { case CURSOR_MODE_MONO: @@ -158,11 +136,49 @@ void hubp32_cursor_set_attributes( cursor_size *= 8; break; } + use_mall_for_cursor = cursor_size > 16384 ? 1 : 0; + + hubp->curs_attr = *attr; - if (cursor_size > 16384) - REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); - else - REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); + if (!hubp->cursor_offload) { + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, use_mall_for_cursor); + } + hubp->att.SURFACE_ADDR_HIGH = attr->address.high_part; + hubp->att.SURFACE_ADDR = attr->address.low_part; + hubp->att.size.bits.width = attr->width; + hubp->att.size.bits.height = attr->height; + hubp->att.cur_ctl.bits.mode = attr->color_format; + + hubp->cur_rect.w = attr->width; + hubp->cur_rect.h = attr->height; + + hubp->att.cur_ctl.bits.pitch = hw_pitch; + hubp->att.cur_ctl.bits.line_per_chunk = lpc; + hubp->att.cur_ctl.bits.cur_2x_magnify = attr->attribute_flags.bits.ENABLE_MAGNIFICATION; + hubp->att.settings.bits.dst_y_offset = 0; + hubp->att.settings.bits.chunk_hdl_adjust = 3; + hubp->use_mall_for_cursor = use_mall_for_cursor; } void hubp32_init(struct hubp *hubp) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 2b49b8327374..41699ff7256f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2727,6 +2727,7 @@ void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pip p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_DST_Y_OFFSET = hubp->att.settings.bits.dst_y_offset; p->HUBPREQ0_CURSOR_SETTINGS__CURSOR0_CHUNK_HDL_ADJUST = hubp->att.settings.bits.chunk_hdl_adjust; + p->HUBP0_DCHUBP_MALL_CONFIG__USE_MALL_FOR_CURSOR = hubp->use_mall_for_cursor; cs->offload_streams[stream_idx].payloads[payload_idx].pipe_mask |= (1u << pipe->pipe_idx); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 5998a20a18c4..b0c13b506c11 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -132,6 +132,7 @@ struct hubp { struct cursor_position_cache_hubp pos; struct cursor_attribute_cache_hubp att; struct cursor_rect cur_rect; + bool use_mall_for_cursor; }; struct surface_flip_registers { -- cgit v1.2.3 From b9a21a379ad0bb15acf7f2855f0e3ccdc83ce83e Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 1 Oct 2025 13:40:57 -0400 Subject: drm/amd/display: Update spacing in struct Update spacing so that fields with longer name will still be aligned correctly (new fields to be added). Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h b/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h index 081831230821..7ce2f417f86a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/cursor_reg_cache.h @@ -57,9 +57,9 @@ struct cursor_attribute_cache_hubp { } size; union reg_cursor_settings_cfg { struct { - uint32_t dst_y_offset: 8; - uint32_t chunk_hdl_adjust: 2; - uint32_t reserved: 22; + uint32_t dst_y_offset: 8; + uint32_t chunk_hdl_adjust: 2; + uint32_t reserved: 22; } bits; uint32_t raw; } settings; -- cgit v1.2.3 From b466ad5574c0511a18328a48f8409422615fa7b1 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Tue, 30 Sep 2025 13:39:02 +0800 Subject: drm/amd/display: not skip hpd irq for bw alloc mode [WHY] Driver only process hpd irq when a branch device or when the link is established. It would cause some irq for bw_alloc mode of dp tunneling are ignored. [HOW] Driver should process hpd irq if bw_alloc and dp tunneling are enabled. Reviewed-by: Cruise Hung Signed-off-by: Peichen Huang Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_dp_dpia_bw.c | 19 +++++++++++++++---- .../display/dc/link/protocols/link_dp_irq_handler.c | 4 +++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 8a3c18ae97a7..b16eb97ae11c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -273,17 +273,28 @@ bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) */ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status) { - link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); - if (status & DP_TUNNELING_BW_REQUEST_SUCCEEDED) { DC_LOG_DEBUG("%s: BW Allocation request succeeded on link(%d)", __func__, link->link_index); - } else if (status & DP_TUNNELING_BW_REQUEST_FAILED) { + } + + if (status & DP_TUNNELING_BW_REQUEST_FAILED) { DC_LOG_DEBUG("%s: BW Allocation request failed on link(%d) allocated/estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); link_dpia_send_bw_alloc_request(link, link->dpia_bw_alloc_config.estimated_bw); - } else if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) { + } + + if (status & DP_TUNNELING_BW_ALLOC_CAP_CHANGED) { + link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link); + + DC_LOG_DEBUG("%s: Granularity changed on link(%d) new granularity=%d", + __func__, link->link_index, link->dpia_bw_alloc_config.bw_granularity); + } + + if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) { + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + DC_LOG_DEBUG("%s: Estimated BW changed on link(%d) new estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 693477413347..4b01ab0a5a7f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -398,10 +398,12 @@ bool dp_should_allow_hpd_rx_irq(const struct dc_link *link) * Don't handle RX IRQ unless one of following is met: * 1) The link is established (cur_link_settings != unknown) * 2) We know we're dealing with a branch device, SST or MST + * 3) The link is bw_alloc enabled. */ if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) || - is_dp_branch_device(link)) + is_dp_branch_device(link) || + link->dpia_bw_alloc_config.bw_alloc_enabled) return true; return false; -- cgit v1.2.3 From 3b98fd0efabbfcbe99d6936dfdd5e92ffa21a32a Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Fri, 26 Sep 2025 10:07:46 -0400 Subject: drm/amd/display: fix dppclk rcg poweron check [WHY & HOW] dppclk rcg power down will flip the poweron flag in the cache to cause dppclk rcg will never run the rcg ungate sequence in some condition. Wait 10us to let dpp dto fully ramp. Reviewed-by: Ovidiu (Ovi) Bunea Reviewed-by: Nicholas Kazlauskas Signed-off-by: Yihan Zhu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index e097d52956b6..856615e7648b 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1187,6 +1187,7 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, /*we have this in hwss: disable_plane*/ //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); } + udelay(10); dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; } @@ -1676,7 +1677,7 @@ static void dccg35_dpp_root_clock_control( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (dccg->dpp_clock_gated[dpp_inst] == clock_on) + if (dccg->dpp_clock_gated[dpp_inst] != clock_on) return; if (clock_on) { @@ -1697,6 +1698,9 @@ static void dccg35_dpp_root_clock_control( //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); } + // wait for clock to fully ramp + udelay(10); + dccg->dpp_clock_gated[dpp_inst] = !clock_on; DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on); } -- cgit v1.2.3 From d745900b40cc688d71c7e5de3a56acd65e4214f9 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 2 Oct 2025 14:20:02 -0400 Subject: drm/amd/display: update perfmon measurement interfaces [how] The commit update interfaces for dchubbub perfmon meansurement to better reflect our requirements. Reviewed-by: Alvin Lee Signed-off-by: Wenjing Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 36 +++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 843a18287c83..2ce47c403840 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -244,15 +244,37 @@ struct hubbub_funcs { bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); void (*get_det_sizes)(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes); uint32_t (*compbuf_config_error)(struct hubbub *hubbub); - struct hubbub_perfmon_funcs{ - void (*start_system_latency_measurement)(struct hubbub *hubbub); - void (*get_system_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, struct hubbub_system_latencies *latencies); - void (*start_in_order_bandwidth_measurement)(struct hubbub *hubbub); - void (*get_in_order_bandwidth_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *bandwidth_mbps); - void (*start_urgent_ramp_latency_measurement)(struct hubbub *hubbub, const struct hubbub_urgent_latency_params *params); - void (*get_urgent_ramp_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *latency_ns); + struct hubbub_perfmon_funcs { void (*reset)(struct hubbub *hubbub); + void (*start_measuring_max_memory_latency_ns)( + struct hubbub *hubbub); + uint32_t (*get_max_memory_latency_ns)(struct hubbub *hubbub, + uint32_t refclk_mhz, uint32_t *sample_count); + void (*start_measuring_average_memory_latency_ns)( + struct hubbub *hubbub); + uint32_t (*get_average_memory_latency_ns)(struct hubbub *hubbub, + uint32_t refclk_mhz, uint32_t *sample_count); + void (*start_measuring_urgent_ramp_latency_ns)( + struct hubbub *hubbub, + const struct hubbub_urgent_latency_params *params); + uint32_t (*get_urgent_ramp_latency_ns)(struct hubbub *hubbub, + uint32_t refclk_mhz); + void (*start_measuring_unbounded_bandwidth_mbps)( + struct hubbub *hubbub); + uint32_t (*get_unbounded_bandwidth_mbps)(struct hubbub *hubbub, + uint32_t refclk_mhz, uint32_t *duration_ns); + void (*start_measuring_average_bandwidth_mbps)( + struct hubbub *hubbub); + uint32_t (*get_average_bandwidth_mbps)(struct hubbub *hubbub, + uint32_t refclk_mhz, uint32_t min_duration_ns, + uint32_t *duration_ns); } perfmon; + + struct hubbub_qos_funcs { + void (*force_display_nominal_profile)(struct hubbub *hubbub); + void (*force_display_urgent_profile)(struct hubbub *hubbub); + void (*reset_display_qos_profile)(struct hubbub *hubbub); + } qos; }; struct hubbub { -- cgit v1.2.3 From e6c0e853f08a04d399b366640ad142ba2a7b3175 Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Thu, 2 Oct 2025 17:47:36 -0400 Subject: drm/amd/display: Move all DCCG RCG into HWSS root_clock_control [why & how] Enabling/disabling DCCG RCG should be done as a last-level step when enabling/disable blocks. This is handled by HWSS root_clock_control already during optimize_bandwidth. However, dccg35_dpp_root_clock_control was missing the RCG enable call on the disable path. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Ovidiu Bunea Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 5 ++--- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 5 ----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 856615e7648b..0aa41759f79d 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -1184,8 +1184,7 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, dcn35_set_dppclk_enable(dccg, dpp_inst, true); } else { dcn35_set_dppclk_enable(dccg, dpp_inst, false); - /*we have this in hwss: disable_plane*/ - //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); + dccg35_set_dppclk_rcg(dccg, dpp_inst, true); } udelay(10); dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; @@ -1695,7 +1694,7 @@ static void dccg35_dpp_root_clock_control( DPPCLK0_DTO_PHASE, 0, DPPCLK0_DTO_MODULO, 1); /*we have this in hwss: disable_plane*/ - //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); + dccg35_set_dppclk_rcg(dccg, dpp_inst, true); } // wait for clock to fully ramp diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 404ff00c7130..9333b7fde3bc 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -817,8 +817,6 @@ void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) { struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dccg *dccg = dc->res_pool->dccg; - /* enable DCFCLK current DCHUB */ pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true); @@ -826,7 +824,6 @@ void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, /* initialize HUBP on power up */ pipe_ctx->plane_res.hubp->funcs->hubp_init(pipe_ctx->plane_res.hubp); /*make sure DPPCLK is on*/ - dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, true); dpp->funcs->dpp_dppclk_control(dpp, false, true); /* make sure OPP_PIPE_CLOCK_EN = 1 */ pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control( @@ -860,7 +857,6 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dccg *dccg = dc->res_pool->dccg; dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); @@ -879,7 +875,6 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) hubp->funcs->hubp_clk_cntl(hubp, false); dpp->funcs->dpp_dppclk_control(dpp, false, false); - dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, false); hubp->power_gated = true; -- cgit v1.2.3 From 08f68d93fae8e47421fd5c7e20cf9deeb8c19f85 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Mon, 29 Sep 2025 16:06:28 -0400 Subject: drm/amd/display: Add sink/link debug logs Add some extra logs to better help triage blackscreen issues. * Dump all the links to see if they have sinks associated. * Print the edid manufacturer & product id associated with a stream that was just created. Reviewed-by: Jerry Zuo Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 73 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 6 +- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6597475e245d..1f52eaabbcfe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3392,6 +3392,67 @@ static void apply_delay_after_dpcd_poweroff(struct amdgpu_device *adev, } } +/** + * amdgpu_dm_dump_links_and_sinks - Debug dump of all DC links and their sinks + * @adev: amdgpu device pointer + * + * Iterates through all DC links and dumps information about local and remote + * (MST) sinks. Should be called after connector detection is complete to see + * the final state of all links. + */ +static void amdgpu_dm_dump_links_and_sinks(struct amdgpu_device *adev) +{ + struct dc *dc = adev->dm.dc; + struct drm_device *dev = adev_to_drm(adev); + int li; + + if (!dc) + return; + + for (li = 0; li < dc->link_count; li++) { + struct dc_link *l = dc->links[li]; + const char *name = NULL; + int rs; + + if (!l) + continue; + if (l->local_sink && l->local_sink->edid_caps.display_name[0]) + name = l->local_sink->edid_caps.display_name; + else + name = "n/a"; + + drm_dbg_kms(dev, + "LINK_DUMP[%d]: local_sink=%p type=%d sink_signal=%d sink_count=%u edid_name=%s mst_capable=%d mst_alloc_streams=%d\n", + li, + l->local_sink, + l->type, + l->local_sink ? l->local_sink->sink_signal : SIGNAL_TYPE_NONE, + l->sink_count, + name, + l->dpcd_caps.is_mst_capable, + l->mst_stream_alloc_table.stream_count); + + /* Dump remote (MST) sinks if any */ + for (rs = 0; rs < l->sink_count; rs++) { + struct dc_sink *rsink = l->remote_sinks[rs]; + const char *rname = NULL; + + if (!rsink) + continue; + if (rsink->edid_caps.display_name[0]) + rname = rsink->edid_caps.display_name; + else + rname = "n/a"; + drm_dbg_kms(dev, + " REMOTE_SINK[%d:%d]: sink=%p signal=%d edid_name=%s\n", + li, rs, + rsink, + rsink->sink_signal, + rname); + } + } +} + static int dm_resume(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3576,6 +3637,12 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) } drm_connector_list_iter_end(&iter); + /* Debug dump: list all DC links and their associated sinks after detection + * is complete for all connectors. This provides a comprehensive view of the + * final state without repeating the dump for each connector. + */ + amdgpu_dm_dump_links_and_sinks(adev); + amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); @@ -5407,6 +5474,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_set_panel_orientation(&aconnector->base); } + /* Debug dump: list all DC links and their associated sinks after detection + * is complete for all connectors. This provides a comprehensive view of the + * final state without repeating the dump for each connector. + */ + amdgpu_dm_dump_links_and_sinks(adev); + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index ccaf37d3e7e4..096fbb8ad24c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -879,9 +879,11 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) stream->sink->sink_signal != SIGNAL_TYPE_NONE) { DC_LOG_DC( - "\tdispname: %s signal: %x\n", + "\tsignal: %x dispname: %s manufacturer_id: 0x%x product_id: 0x%x\n", + stream->signal, stream->sink->edid_caps.display_name, - stream->signal); + stream->sink->edid_caps.manufacturer_id, + stream->sink->edid_caps.product_id); } } } -- cgit v1.2.3 From c04812cbe2f247a1c1e53a9b6c5e659963fe4065 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 25 Sep 2025 10:23:59 -0400 Subject: drm/amd/display: use GFP_NOWAIT for allocation in interrupt handler schedule_dc_vmin_vmax() is called by dm_crtc_high_irq(). Hence, we cannot have the former sleep. Use GFP_NOWAIT for allocation in this function. Fixes: c210b757b400 ("drm/amd/display: fix dmub access race condition") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng (Leo) Li Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1f52eaabbcfe..3ea17a3d307d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -551,13 +551,13 @@ static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust) { - struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT); if (!offload_work) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); return; } - struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL); + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT); if (!adjust_copy) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); kfree(offload_work); -- cgit v1.2.3 From 03a593b1acbaf5adabf766738da8f9a619151511 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Tue, 15 Jul 2025 14:02:40 +0200 Subject: drm/amd/display: Remove dc state from check_update [Why] dc_check_update_surfaces_for_stream should not have access to entire DC, especially not a mutable one. Concurrent checks should be able to run independently of one another, without risk of changing state. [How] * Remove access to dc state other than debug and capacity. * Move some checks from DC to DM caller. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 81 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 2c2635b6cb0f..34e417a12304 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2725,21 +2725,32 @@ static enum surface_update_type get_plane_info_update_type(const struct dc *dc, elevate_update_type(&update_type, UPDATE_TYPE_MED); } + const struct dc_tiling_info *tiling = &u->plane_info->tiling_info; - if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, - sizeof(struct dc_tiling_info)) != 0) { + if (memcmp(tiling, &u->surface->tiling_info, sizeof(*tiling)) != 0) { update_flags->bits.swizzle_change = 1; elevate_update_type(&update_type, UPDATE_TYPE_MED); - /* todo: below are HW dependent, we should add a hook to - * DCE/N resource and validated there. - */ - if (!dc->debug.skip_full_updated_if_possible) { - /* swizzled mode requires RQ to be setup properly, - * thus need to run DML to calculate RQ settings - */ - update_flags->bits.bandwidth_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + switch (tiling->gfxversion) { + case DcGfxVersion9: + case DcGfxVersion10: + case DcGfxVersion11: + if (tiling->gfx9.swizzle != DC_SW_LINEAR) { + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + update_flags->bits.bandwidth_change = 1; + } + break; + case DcGfxAddr3: + if (tiling->gfx_addr3.swizzle != DC_ADDR3_SW_LINEAR) { + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + update_flags->bits.bandwidth_change = 1; + } + break; + case DcGfxVersion7: + case DcGfxVersion8: + case DcGfxVersionUnknown: + default: + break; } } @@ -2808,12 +2819,11 @@ static enum surface_update_type get_scaling_info_update_type( static enum surface_update_type det_surface_update(const struct dc *dc, const struct dc_surface_update *u) { - const struct dc_state *context = dc->current_state; enum surface_update_type type; enum surface_update_type overall_type = UPDATE_TYPE_FAST; union surface_update_flags *update_flags = &u->surface->update_flags; - if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) { + if (u->surface->force_full_update) { update_flags->raw = 0xFFFFFFFF; return UPDATE_TYPE_FULL; } @@ -2944,12 +2954,6 @@ static enum surface_update_type check_update_surfaces_for_stream( int i; enum surface_update_type overall_type = UPDATE_TYPE_FAST; - if (dc->idle_optimizations_allowed || dc_can_clear_cursor_limit(dc)) - overall_type = UPDATE_TYPE_FULL; - - if (stream_status == NULL || stream_status->plane_count != surface_count) - overall_type = UPDATE_TYPE_FULL; - if (stream_update && stream_update->pending_test_pattern) { overall_type = UPDATE_TYPE_FULL; } @@ -3046,27 +3050,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( updates[i].surface->update_flags.raw = 0; type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status); - if (type == UPDATE_TYPE_FULL) { - if (stream_update) { - uint32_t dsc_changed = stream_update->stream->update_flags.bits.dsc_changed; - stream_update->stream->update_flags.raw = 0xFFFFFFFF; - stream_update->stream->update_flags.bits.dsc_changed = dsc_changed; - } - for (i = 0; i < surface_count; i++) - updates[i].surface->update_flags.raw = 0xFFFFFFFF; - } - - if (type == UPDATE_TYPE_FAST) { - // If there's an available clock comparator, we use that. - if (dc->clk_mgr->funcs->are_clock_states_equal) { - if (!dc->clk_mgr->funcs->are_clock_states_equal(&dc->clk_mgr->clks, &dc->current_state->bw_ctx.bw.dcn.clk)) - dc->optimized_required = true; - // Else we fallback to mem compare. - } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { - dc->optimized_required = true; - } - } - return type; } @@ -3436,6 +3419,12 @@ static void update_seamless_boot_flags(struct dc *dc, } } +static bool full_update_required(struct dc *dc, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update, + struct dc_stream_state *stream); + /** * update_planes_and_stream_state() - The function takes planes and stream * updates as inputs and determines the appropriate update type. If update type @@ -3483,6 +3472,8 @@ static bool update_planes_and_stream_state(struct dc *dc, context = dc->current_state; update_type = dc_check_update_surfaces_for_stream( dc, srf_updates, surface_count, stream_update, stream_status); + if (full_update_required(dc, srf_updates, surface_count, stream_update, stream)) + update_type = UPDATE_TYPE_FULL; /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. @@ -3514,6 +3505,16 @@ static bool update_planes_and_stream_state(struct dc *dc, } } + if (update_type == UPDATE_TYPE_FULL) { + if (stream_update) { + uint32_t dsc_changed = stream_update->stream->update_flags.bits.dsc_changed; + stream_update->stream->update_flags.raw = 0xFFFFFFFF; + stream_update->stream->update_flags.bits.dsc_changed = dsc_changed; + } + for (i = 0; i < surface_count; i++) + srf_updates[i].surface->update_flags.raw = 0xFFFFFFFF; + } + if (update_type >= update_surface_trace_level) update_surface_trace(dc, srf_updates, surface_count); -- cgit v1.2.3 From bf95cf7f7a06850648169bfbe7935073f35f6414 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Fri, 3 Oct 2025 11:50:55 +0200 Subject: drm/amd/display: Fix performance regression from full updates [Why] full_update_required is too strict at update_planes_and_stream_state, causing a performance regression due to too many updates being full. [How] * Carve out weak version of full_update_required for use inside update_planes_and_stream_state. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 54 ++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 34e417a12304..36b046611d02 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3419,7 +3419,7 @@ static void update_seamless_boot_flags(struct dc *dc, } } -static bool full_update_required(struct dc *dc, +static bool full_update_required_weak(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update, @@ -3472,8 +3472,10 @@ static bool update_planes_and_stream_state(struct dc *dc, context = dc->current_state; update_type = dc_check_update_surfaces_for_stream( dc, srf_updates, surface_count, stream_update, stream_status); - if (full_update_required(dc, srf_updates, surface_count, stream_update, stream)) + + if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream)) update_type = UPDATE_TYPE_FULL; + /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. @@ -5047,18 +5049,42 @@ bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_ return false; } -static bool full_update_required(struct dc *dc, +static bool full_update_required_weak(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update, struct dc_stream_state *stream) { - - int i; - struct dc_stream_status *stream_status; const struct dc_state *context = dc->current_state; + if (srf_updates) + for (int i = 0; i < surface_count; i++) + if (!is_surface_in_context(context, srf_updates[i].surface)) + return true; - for (i = 0; i < surface_count; i++) { + if (stream) { + const struct dc_stream_status *stream_status = dc_stream_get_status(stream); + if (stream_status == NULL || stream_status->plane_count != surface_count) + return true; + } + if (dc->idle_optimizations_allowed) + return true; + + if (dc_can_clear_cursor_limit(dc)) + return true; + + return false; +} + +static bool full_update_required(struct dc *dc, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update, + struct dc_stream_state *stream) +{ + if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream)) + return true; + + for (int i = 0; i < surface_count; i++) { if (srf_updates && (srf_updates[i].plane_info || srf_updates[i].scaling_info || @@ -5074,8 +5100,7 @@ static bool full_update_required(struct dc *dc, srf_updates[i].flip_addr->address.tmz_surface != srf_updates[i].surface->address.tmz_surface) || (srf_updates[i].cm2_params && (srf_updates[i].cm2_params->component_settings.shaper_3dlut_setting != srf_updates[i].surface->mcm_shaper_3dlut_setting || - srf_updates[i].cm2_params->component_settings.lut1d_enable != srf_updates[i].surface->mcm_lut1d_enable)) || - !is_surface_in_context(context, srf_updates[i].surface))) + srf_updates[i].cm2_params->component_settings.lut1d_enable != srf_updates[i].surface->mcm_lut1d_enable)))) return true; } @@ -5111,17 +5136,6 @@ static bool full_update_required(struct dc *dc, stream_update->hw_cursor_req)) return true; - if (stream) { - stream_status = dc_stream_get_status(stream); - if (stream_status == NULL || stream_status->plane_count != surface_count) - return true; - } - if (dc->idle_optimizations_allowed) - return true; - - if (dc_can_clear_cursor_limit(dc)) - return true; - return false; } -- cgit v1.2.3 From 8c6a0234739e33c8be8830c2eee13a49acfd59ea Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 2 Oct 2025 11:02:39 -0400 Subject: drm/amd/display: add additional hdcp traces [why] Current hdcp trace only tracks hdcp errors. We need to expand the trace structure for more tracing information. [how] Add following traces for hdcp1: - attempt_count - downstream_device_count Add following traces for hdcp2: - attempt_count - downstream_device_count - hdcp1_device_downstream - hdcp2_legacy_device_downstream Reviewed-by: Sung Lee Signed-off-by: Wenjing Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c | 6 +++--- drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h | 2 +- .../drm/amd/display/modules/hdcp/hdcp1_execution.c | 13 +++++++++++-- .../drm/amd/display/modules/hdcp/hdcp2_execution.c | 19 ++++++++++++++++--- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h | 6 ++++++ drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h | 14 ++++++++++++++ 6 files changed, 51 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index c760216a6240..ca402ddcdacc 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -354,7 +354,7 @@ enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp, /* reset retry counters */ reset_retry_counts(hdcp); - /* reset error trace */ + /* reset trace */ memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace)); /* add display to connection */ @@ -400,7 +400,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, /* clear retry counters */ reset_retry_counts(hdcp); - /* reset error trace */ + /* reset trace */ memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace)); /* remove display */ @@ -464,7 +464,7 @@ enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, /* clear retry counters */ reset_retry_counts(hdcp); - /* reset error trace */ + /* reset trace */ memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace)); /* set new adjustment */ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index a37634942b07..b883d626f1c3 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -508,7 +508,7 @@ static inline void set_auth_complete(struct mod_hdcp *hdcp, struct mod_hdcp_output *output) { output->auth_complete = 1; - mod_hdcp_log_ddc_trace(hdcp); + HDCP_AUTH_COMPLETE_TRACE(hdcp); } /* connection topology helpers */ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 8bc377560787..1bbd728d4345 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -29,6 +29,7 @@ static inline enum mod_hdcp_status validate_bksv(struct mod_hdcp *hdcp) { uint64_t n = 0; uint8_t count = 0; + enum mod_hdcp_status status; u8 bksv[sizeof(n)] = { }; memcpy(bksv, hdcp->auth.msg.hdcp1.bksv, sizeof(hdcp->auth.msg.hdcp1.bksv)); @@ -38,8 +39,14 @@ static inline enum mod_hdcp_status validate_bksv(struct mod_hdcp *hdcp) count++; n &= (n - 1); } - return (count == 20) ? MOD_HDCP_STATUS_SUCCESS : - MOD_HDCP_STATUS_HDCP1_INVALID_BKSV; + + if (count == 20) { + hdcp->connection.trace.hdcp1.attempt_count++; + status = MOD_HDCP_STATUS_SUCCESS; + } else { + status = MOD_HDCP_STATUS_HDCP1_INVALID_BKSV; + } + return status; } static inline enum mod_hdcp_status check_ksv_ready(struct mod_hdcp *hdcp) @@ -135,6 +142,8 @@ static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; + hdcp->connection.trace.hdcp1.downstream_device_count = get_device_count(hdcp); + /* Some MST display may choose to report the internal panel as an HDCP RX. * To update this condition with 1(because the immediate repeater's internal * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index bb8ae80b37f8..5628f0ef73fd 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -48,6 +48,7 @@ static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp static inline enum mod_hdcp_status check_hdcp2_capable(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; + struct mod_hdcp_trace *trace = &hdcp->connection.trace; if (is_dp_hdcp(hdcp)) status = (hdcp->auth.msg.hdcp2.rxcaps_dp[0] == HDCP_2_2_RX_CAPS_VERSION_VAL) && @@ -55,9 +56,14 @@ static inline enum mod_hdcp_status check_hdcp2_capable(struct mod_hdcp *hdcp) MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE; else - status = (hdcp->auth.msg.hdcp2.hdcp2version_hdmi & HDCP_2_2_HDMI_SUPPORT_MASK) ? - MOD_HDCP_STATUS_SUCCESS : - MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE; + status = (hdcp->auth.msg.hdcp2.hdcp2version_hdmi + & HDCP_2_2_HDMI_SUPPORT_MASK) + ? MOD_HDCP_STATUS_SUCCESS + : MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE; + + if (status == MOD_HDCP_STATUS_SUCCESS) + trace->hdcp2.attempt_count++; + return status; } @@ -201,10 +207,17 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { + struct mod_hdcp_trace *trace = &hdcp->connection.trace; + /* Avoid device count == 0 to do authentication */ if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; + trace->hdcp2.downstream_device_count = get_device_count(hdcp); + trace->hdcp2.hdcp1_device_downstream = + HDCP_2_2_HDCP1_DEVICE_CONNECTED(hdcp->auth.msg.hdcp2.rx_id_list[2]); + trace->hdcp2.hdcp2_legacy_device_downstream = + HDCP_2_2_HDCP_2_0_REP_CONNECTED(hdcp->auth.msg.hdcp2.rx_id_list[2]); /* Some MST display may choose to report the internal panel as an HDCP RX. */ /* To update this condition with 1(because the immediate repeater's internal */ /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h index 1d83c1b9da10..26553aa4c5ca 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h @@ -31,6 +31,7 @@ #define HDCP_LOG_FSM(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__) #define HDCP_LOG_TOP(hdcp, ...) pr_debug("[HDCP_TOP]:"__VA_ARGS__) #define HDCP_LOG_DDC(hdcp, ...) pr_debug("[HDCP_DDC]:"__VA_ARGS__) +#define HDCP_LOG_TRA(hdcp) do {} while (0) /* default logs */ #define HDCP_ERROR_TRACE(hdcp, status) \ @@ -131,4 +132,9 @@ HDCP_LOG_TOP(hdcp, "[Link %d] %s display %d", hdcp->config.index, __func__, i); \ } while (0) +#define HDCP_AUTH_COMPLETE_TRACE(hdcp) do { \ + mod_hdcp_log_ddc_trace(hdcp); \ + HDCP_LOG_TRA(hdcp); \ +} while (0) + #endif // MOD_HDCP_LOG_H_ diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index b51ddf2846df..46e52fb3a118 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -230,9 +230,23 @@ struct mod_hdcp_error { uint8_t state_id; }; +struct mod_hdcp1_trace { + uint8_t attempt_count; + uint8_t downstream_device_count; +}; + +struct mod_hdcp2_trace { + uint8_t attempt_count; + uint8_t downstream_device_count; + uint8_t hdcp1_device_downstream; + uint8_t hdcp2_legacy_device_downstream; +}; + struct mod_hdcp_trace { struct mod_hdcp_error errors[MAX_NUM_OF_ERROR_TRACE]; uint8_t error_count; + struct mod_hdcp1_trace hdcp1; + struct mod_hdcp2_trace hdcp2; }; enum mod_hdcp_encryption_status { -- cgit v1.2.3 From f96012baa5d3b4336092855e4e6d24830ea50736 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Wed, 3 Sep 2025 14:07:41 -0400 Subject: drm/amd/display: add new block sequence-building/executing functions [Why/How] Create functions for building/executing HW block programming steps Reviewed-by: Alvin Lee Signed-off-by: Ilya Bakoulin Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 3161 ++++++++++++++++++-- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 1352 ++++++++- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 98 + .../drm/amd/display/dc/hwss/dcn401/dcn401_init.c | 17 + drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 1409 ++++++++- .../drm/amd/display/dc/hwss/hw_sequencer_private.h | 36 + 7 files changed, 5878 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 1bed3b14a287..16d916986fed 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -32,6 +32,12 @@ #include "resource.h" #include "dc_dmub_srv.h" #include "dc_state_priv.h" +#include "opp.h" +#include "dsc.h" +#include "dchubbub.h" +#include "dccg.h" +#include "abm.h" +#include "dcn10/dcn10_hubbub.h" #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) #define MAX_NUM_MCACHE 8 @@ -784,7 +790,7 @@ void hwss_build_fast_sequence(struct dc *dc, while (current_mpc_pipe) { if (current_mpc_pipe->plane_state) { if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_flip_control_gsl_params.hubp = current_mpc_pipe->plane_res.hubp; block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; (*num_steps)++; @@ -949,8 +955,9 @@ void hwss_execute_sequence(struct dc *dc, params->pipe_control_lock_params.lock); break; case HUBP_SET_FLIP_CONTROL_GSL: - dc->hwss.set_flip_control_gsl(params->set_flip_control_gsl_params.pipe_ctx, - params->set_flip_control_gsl_params.flip_immediate); + params->set_flip_control_gsl_params.hubp->funcs->hubp_set_flip_control_surface_gsl( + params->set_flip_control_gsl_params.hubp, + params->set_flip_control_gsl_params.flip_immediate); break; case HUBP_PROGRAM_TRIPLEBUFFER: dc->hwss.program_triplebuffer(params->program_triplebuffer_params.dc, @@ -1011,11 +1018,296 @@ void hwss_execute_sequence(struct dc *dc, case DMUB_HW_CONTROL_LOCK_FAST: dc->hwss.dmub_hw_control_lock_fast(params); break; + case HUBP_PROGRAM_SURFACE_CONFIG: + hwss_program_surface_config(params); + break; + case HUBP_PROGRAM_MCACHE_ID: + hwss_program_mcache_id_and_split_coordinate(params); + break; case PROGRAM_CURSOR_UPDATE_NOW: dc->hwss.program_cursor_offload_now( params->program_cursor_update_now_params.dc, params->program_cursor_update_now_params.pipe_ctx); break; + case HUBP_WAIT_PIPE_READ_START: + params->hubp_wait_pipe_read_start_params.hubp->funcs->hubp_wait_pipe_read_start( + params->hubp_wait_pipe_read_start_params.hubp); + break; + case HWS_APPLY_UPDATE_FLAGS_FOR_PHANTOM: + dc->hwss.apply_update_flags_for_phantom(params->apply_update_flags_for_phantom_params.pipe_ctx); + break; + case HWS_UPDATE_PHANTOM_VP_POSITION: + dc->hwss.update_phantom_vp_position(params->update_phantom_vp_position_params.dc, + params->update_phantom_vp_position_params.context, + params->update_phantom_vp_position_params.pipe_ctx); + break; + case OPTC_SET_ODM_COMBINE: + hwss_set_odm_combine(params); + break; + case OPTC_SET_ODM_BYPASS: + hwss_set_odm_bypass(params); + break; + case OPP_PIPE_CLOCK_CONTROL: + hwss_opp_pipe_clock_control(params); + break; + case OPP_PROGRAM_LEFT_EDGE_EXTRA_PIXEL: + hwss_opp_program_left_edge_extra_pixel(params); + break; + case DCCG_SET_DTO_DSCCLK: + hwss_dccg_set_dto_dscclk(params); + break; + case DSC_SET_CONFIG: + hwss_dsc_set_config(params); + break; + case DSC_ENABLE: + hwss_dsc_enable(params); + break; + case TG_SET_DSC_CONFIG: + hwss_tg_set_dsc_config(params); + break; + case DSC_DISCONNECT: + hwss_dsc_disconnect(params); + break; + case DSC_READ_STATE: + hwss_dsc_read_state(params); + break; + case DSC_CALCULATE_AND_SET_CONFIG: + hwss_dsc_calculate_and_set_config(params); + break; + case DSC_ENABLE_WITH_OPP: + hwss_dsc_enable_with_opp(params); + break; + case TG_PROGRAM_GLOBAL_SYNC: + hwss_tg_program_global_sync(params); + break; + case TG_WAIT_FOR_STATE: + hwss_tg_wait_for_state(params); + break; + case TG_SET_VTG_PARAMS: + hwss_tg_set_vtg_params(params); + break; + case TG_SETUP_VERTICAL_INTERRUPT2: + hwss_tg_setup_vertical_interrupt2(params); + break; + case DPP_SET_HDR_MULTIPLIER: + hwss_dpp_set_hdr_multiplier(params); + break; + case HUBP_PROGRAM_DET_SIZE: + hwss_program_det_size(params); + break; + case HUBP_PROGRAM_DET_SEGMENTS: + hwss_program_det_segments(params); + break; + case OPP_SET_DYN_EXPANSION: + hwss_opp_set_dyn_expansion(params); + break; + case OPP_PROGRAM_FMT: + hwss_opp_program_fmt(params); + break; + case OPP_PROGRAM_BIT_DEPTH_REDUCTION: + hwss_opp_program_bit_depth_reduction(params); + break; + case OPP_SET_DISP_PATTERN_GENERATOR: + hwss_opp_set_disp_pattern_generator(params); + break; + case ABM_SET_PIPE: + hwss_set_abm_pipe(params); + break; + case ABM_SET_LEVEL: + hwss_set_abm_level(params); + break; + case ABM_SET_IMMEDIATE_DISABLE: + hwss_set_abm_immediate_disable(params); + break; + case MPC_REMOVE_MPCC: + hwss_mpc_remove_mpcc(params); + break; + case OPP_SET_MPCC_DISCONNECT_PENDING: + hwss_opp_set_mpcc_disconnect_pending(params); + break; + case DC_SET_OPTIMIZED_REQUIRED: + hwss_dc_set_optimized_required(params); + break; + case HUBP_DISCONNECT: + hwss_hubp_disconnect(params); + break; + case HUBBUB_FORCE_PSTATE_CHANGE_CONTROL: + hwss_hubbub_force_pstate_change_control(params); + break; + case TG_ENABLE_CRTC: + hwss_tg_enable_crtc(params); + break; + case TG_SET_GSL: + hwss_tg_set_gsl(params); + break; + case TG_SET_GSL_SOURCE_SELECT: + hwss_tg_set_gsl_source_select(params); + break; + case HUBP_WAIT_FLIP_PENDING: + hwss_hubp_wait_flip_pending(params); + break; + case TG_WAIT_DOUBLE_BUFFER_PENDING: + hwss_tg_wait_double_buffer_pending(params); + break; + case UPDATE_FORCE_PSTATE: + hwss_update_force_pstate(params); + break; + case HUBBUB_APPLY_DEDCN21_147_WA: + hwss_hubbub_apply_dedcn21_147_wa(params); + break; + case HUBBUB_ALLOW_SELF_REFRESH_CONTROL: + hwss_hubbub_allow_self_refresh_control(params); + break; + case TG_GET_FRAME_COUNT: + hwss_tg_get_frame_count(params); + break; + case MPC_SET_DWB_MUX: + hwss_mpc_set_dwb_mux(params); + break; + case MPC_DISABLE_DWB_MUX: + hwss_mpc_disable_dwb_mux(params); + break; + case MCIF_WB_CONFIG_BUF: + hwss_mcif_wb_config_buf(params); + break; + case MCIF_WB_CONFIG_ARB: + hwss_mcif_wb_config_arb(params); + break; + case MCIF_WB_ENABLE: + hwss_mcif_wb_enable(params); + break; + case MCIF_WB_DISABLE: + hwss_mcif_wb_disable(params); + break; + case DWBC_ENABLE: + hwss_dwbc_enable(params); + break; + case DWBC_DISABLE: + hwss_dwbc_disable(params); + break; + case DWBC_UPDATE: + hwss_dwbc_update(params); + break; + case HUBP_UPDATE_MALL_SEL: + hwss_hubp_update_mall_sel(params); + break; + case HUBP_PREPARE_SUBVP_BUFFERING: + hwss_hubp_prepare_subvp_buffering(params); + break; + case HUBP_SET_BLANK_EN: + hwss_hubp_set_blank_en(params); + break; + case HUBP_DISABLE_CONTROL: + hwss_hubp_disable_control(params); + break; + case HUBBUB_SOFT_RESET: + hwss_hubbub_soft_reset(params); + break; + case HUBP_CLK_CNTL: + hwss_hubp_clk_cntl(params); + break; + case HUBP_INIT: + hwss_hubp_init(params); + break; + case HUBP_SET_VM_SYSTEM_APERTURE_SETTINGS: + hwss_hubp_set_vm_system_aperture_settings(params); + break; + case HUBP_SET_FLIP_INT: + hwss_hubp_set_flip_int(params); + break; + case DPP_DPPCLK_CONTROL: + hwss_dpp_dppclk_control(params); + break; + case DISABLE_PHANTOM_CRTC: + hwss_disable_phantom_crtc(params); + break; + case DSC_PG_STATUS: + hwss_dsc_pg_status(params); + break; + case DSC_WAIT_DISCONNECT_PENDING_CLEAR: + hwss_dsc_wait_disconnect_pending_clear(params); + break; + case DSC_DISABLE: + hwss_dsc_disable(params); + break; + case DCCG_SET_REF_DSCCLK: + hwss_dccg_set_ref_dscclk(params); + break; + case DPP_PG_CONTROL: + hwss_dpp_pg_control(params); + break; + case HUBP_PG_CONTROL: + hwss_hubp_pg_control(params); + break; + case HUBP_RESET: + hwss_hubp_reset(params); + break; + case DPP_RESET: + hwss_dpp_reset(params); + break; + case DPP_ROOT_CLOCK_CONTROL: + hwss_dpp_root_clock_control(params); + break; + case DC_IP_REQUEST_CNTL: + hwss_dc_ip_request_cntl(params); + break; + case DCCG_UPDATE_DPP_DTO: + hwss_dccg_update_dpp_dto(params); + break; + case HUBP_VTG_SEL: + hwss_hubp_vtg_sel(params); + break; + case HUBP_SETUP2: + hwss_hubp_setup2(params); + break; + case HUBP_SETUP: + hwss_hubp_setup(params); + break; + case HUBP_SET_UNBOUNDED_REQUESTING: + hwss_hubp_set_unbounded_requesting(params); + break; + case HUBP_SETUP_INTERDEPENDENT2: + hwss_hubp_setup_interdependent2(params); + break; + case HUBP_SETUP_INTERDEPENDENT: + hwss_hubp_setup_interdependent(params); + break; + case DPP_SET_CURSOR_MATRIX: + hwss_dpp_set_cursor_matrix(params); + break; + case MPC_UPDATE_BLENDING: + hwss_mpc_update_blending(params); + break; + case MPC_ASSERT_IDLE_MPCC: + hwss_mpc_assert_idle_mpcc(params); + break; + case MPC_INSERT_PLANE: + hwss_mpc_insert_plane(params); + break; + case DPP_SET_SCALER: + hwss_dpp_set_scaler(params); + break; + case HUBP_MEM_PROGRAM_VIEWPORT: + hwss_hubp_mem_program_viewport(params); + break; + case SET_CURSOR_ATTRIBUTE: + hwss_set_cursor_attribute(params); + break; + case SET_CURSOR_POSITION: + hwss_set_cursor_position(params); + break; + case SET_CURSOR_SDR_WHITE_LEVEL: + hwss_set_cursor_sdr_white_level(params); + break; + case PROGRAM_OUTPUT_CSC: + hwss_program_output_csc(params); + break; + case HUBP_SET_BLANK: + hwss_hubp_set_blank(params); + break; + case PHANTOM_HUBP_POST_ENABLE: + hwss_phantom_hubp_post_enable(params); + break; default: ASSERT(false); break; @@ -1023,256 +1315,2749 @@ void hwss_execute_sequence(struct dc *dc, } } -void hwss_send_dmcub_cmd(union block_sequence_params *params) +/** + * Helper function to add OPTC pipe control lock to block sequence + */ +void hwss_add_optc_pipe_control_lock(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool lock) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.pipe_control_lock_params.lock = lock; + seq_state->steps[*seq_state->num_steps].func = OPTC_PIPE_CONTROL_LOCK; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP set flip control GSL to block sequence + */ +void hwss_add_hubp_set_flip_control_gsl(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool flip_immediate) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_flip_control_gsl_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.set_flip_control_gsl_params.flip_immediate = flip_immediate; + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP program triplebuffer to block sequence + */ +void hwss_add_hubp_program_triplebuffer(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool enableTripleBuffer) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.program_triplebuffer_params.enableTripleBuffer = enableTripleBuffer; + seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP update plane address to block sequence + */ +void hwss_add_hubp_update_plane_addr(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.update_plane_addr_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.update_plane_addr_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = HUBP_UPDATE_PLANE_ADDR; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DPP set input transfer function to block sequence + */ +void hwss_add_dpp_set_input_transfer_func(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_plane_state *plane_state) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.set_input_transfer_func_params.plane_state = plane_state; + seq_state->steps[*seq_state->num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DPP program gamut remap to block sequence + */ +void hwss_add_dpp_program_gamut_remap(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_gamut_remap_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = DPP_PROGRAM_GAMUT_REMAP; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DPP program bias and scale to block sequence + */ +void hwss_add_dpp_program_bias_and_scale(struct block_sequence_state *seq_state, struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_bias_and_scale_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add OPTC program manual trigger to block sequence + */ +void hwss_add_optc_program_manual_trigger(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_manual_trigger_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = OPTC_PROGRAM_MANUAL_TRIGGER; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DPP set output transfer function to block sequence + */ +void hwss_add_dpp_set_output_transfer_func(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.set_output_transfer_func_params.stream = stream; + seq_state->steps[*seq_state->num_steps].func = DPP_SET_OUTPUT_TRANSFER_FUNC; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC update visual confirm to block sequence + */ +void hwss_add_mpc_update_visual_confirm(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + int mpcc_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.update_visual_confirm_params.mpcc_id = mpcc_id; + seq_state->steps[*seq_state->num_steps].func = MPC_UPDATE_VISUAL_CONFIRM; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC power on MPC mem PWR to block sequence + */ +void hwss_add_mpc_power_on_mpc_mem_pwr(struct block_sequence_state *seq_state, + struct mpc *mpc, + int mpcc_id, + bool power_on) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.mpcc_id = mpcc_id; + seq_state->steps[*seq_state->num_steps].params.power_on_mpc_mem_pwr_params.power_on = power_on; + seq_state->steps[*seq_state->num_steps].func = MPC_POWER_ON_MPC_MEM_PWR; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC set output CSC to block sequence + */ +void hwss_add_mpc_set_output_csc(struct block_sequence_state *seq_state, + struct mpc *mpc, + int opp_id, + const uint16_t *regval, + enum mpc_output_csc_mode ocsc_mode) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.opp_id = opp_id; + seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.regval = regval; + seq_state->steps[*seq_state->num_steps].params.set_output_csc_params.ocsc_mode = ocsc_mode; + seq_state->steps[*seq_state->num_steps].func = MPC_SET_OUTPUT_CSC; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC set OCSC default to block sequence + */ +void hwss_add_mpc_set_ocsc_default(struct block_sequence_state *seq_state, + struct mpc *mpc, + int opp_id, + enum dc_color_space colorspace, + enum mpc_output_csc_mode ocsc_mode) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.opp_id = opp_id; + seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.color_space = colorspace; + seq_state->steps[*seq_state->num_steps].params.set_ocsc_default_params.ocsc_mode = ocsc_mode; + seq_state->steps[*seq_state->num_steps].func = MPC_SET_OCSC_DEFAULT; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DMUB send DMCUB command to block sequence + */ +void hwss_add_dmub_send_dmcub_cmd(struct block_sequence_state *seq_state, + struct dc_context *ctx, + union dmub_rb_cmd *cmd, + enum dm_dmub_wait_type wait_type) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.ctx = ctx; + seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.cmd = cmd; + seq_state->steps[*seq_state->num_steps].params.send_dmcub_cmd_params.wait_type = wait_type; + seq_state->steps[*seq_state->num_steps].func = DMUB_SEND_DMCUB_CMD; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DMUB SubVP save surface address to block sequence + */ +void hwss_add_dmub_subvp_save_surf_addr(struct block_sequence_state *seq_state, + struct dc_dmub_srv *dc_dmub_srv, + struct dc_plane_address *addr, + uint8_t subvp_index) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc_dmub_srv; + seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.addr = addr; + seq_state->steps[*seq_state->num_steps].params.subvp_save_surf_addr.subvp_index = subvp_index; + seq_state->steps[*seq_state->num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP wait for DCC meta propagation to block sequence + */ +void hwss_add_hubp_wait_for_dcc_meta_prop(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *top_pipe_to_program) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = top_pipe_to_program; + seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP wait pipe read start to block sequence + */ +void hwss_add_hubp_wait_pipe_read_start(struct block_sequence_state *seq_state, + struct hubp *hubp) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.hubp_wait_pipe_read_start_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_PIPE_READ_START; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HWS apply update flags for phantom to block sequence + */ +void hwss_add_hws_apply_update_flags_for_phantom(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.apply_update_flags_for_phantom_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = HWS_APPLY_UPDATE_FLAGS_FOR_PHANTOM; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HWS update phantom VP position to block sequence + */ +void hwss_add_hws_update_phantom_vp_position(struct block_sequence_state *seq_state, + struct dc *dc, + struct dc_state *context, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.context = context; + seq_state->steps[*seq_state->num_steps].params.update_phantom_vp_position_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = HWS_UPDATE_PHANTOM_VP_POSITION; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add OPTC set ODM combine to block sequence + */ +void hwss_add_optc_set_odm_combine(struct block_sequence_state *seq_state, + struct timing_generator *tg, int opp_inst[MAX_PIPES], int opp_head_count, + int odm_slice_width, int last_odm_slice_width) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.tg = tg; + memcpy(seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.opp_inst, opp_inst, sizeof(int) * MAX_PIPES); + seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.opp_head_count = opp_head_count; + seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.odm_slice_width = odm_slice_width; + seq_state->steps[*seq_state->num_steps].params.set_odm_combine_params.last_odm_slice_width = last_odm_slice_width; + seq_state->steps[*seq_state->num_steps].func = OPTC_SET_ODM_COMBINE; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add OPTC set ODM bypass to block sequence + */ +void hwss_add_optc_set_odm_bypass(struct block_sequence_state *seq_state, + struct timing_generator *tg, struct dc_crtc_timing *timing) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_odm_bypass_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.set_odm_bypass_params.timing = timing; + seq_state->steps[*seq_state->num_steps].func = OPTC_SET_ODM_BYPASS; + (*seq_state->num_steps)++; + } +} + +void hwss_send_dmcub_cmd(union block_sequence_params *params) +{ + struct dc_context *ctx = params->send_dmcub_cmd_params.ctx; + union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd; + enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type; + + dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type); +} + +/** + * Helper function to add TG program global sync to block sequence + */ +void hwss_add_tg_program_global_sync(struct block_sequence_state *seq_state, + struct timing_generator *tg, + int vready_offset, + unsigned int vstartup_lines, + unsigned int vupdate_offset_pixels, + unsigned int vupdate_vupdate_width_pixels, + unsigned int pstate_keepout_start_lines) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vready_offset = vready_offset; + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vstartup_lines = vstartup_lines; + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vupdate_offset_pixels = vupdate_offset_pixels; + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.vupdate_vupdate_width_pixels = vupdate_vupdate_width_pixels; + seq_state->steps[*seq_state->num_steps].params.tg_program_global_sync_params.pstate_keepout_start_lines = pstate_keepout_start_lines; + seq_state->steps[*seq_state->num_steps].func = TG_PROGRAM_GLOBAL_SYNC; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add TG wait for state to block sequence + */ +void hwss_add_tg_wait_for_state(struct block_sequence_state *seq_state, + struct timing_generator *tg, + enum crtc_state state) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_wait_for_state_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_wait_for_state_params.state = state; + seq_state->steps[*seq_state->num_steps].func = TG_WAIT_FOR_STATE; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add TG set VTG params to block sequence + */ +void hwss_add_tg_set_vtg_params(struct block_sequence_state *seq_state, + struct timing_generator *tg, + struct dc_crtc_timing *dc_crtc_timing, + bool program_fp2) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.timing = dc_crtc_timing; + seq_state->steps[*seq_state->num_steps].params.tg_set_vtg_params_params.program_fp2 = program_fp2; + seq_state->steps[*seq_state->num_steps].func = TG_SET_VTG_PARAMS; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add TG setup vertical interrupt2 to block sequence + */ +void hwss_add_tg_setup_vertical_interrupt2(struct block_sequence_state *seq_state, + struct timing_generator *tg, int start_line) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_setup_vertical_interrupt2_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_setup_vertical_interrupt2_params.start_line = start_line; + seq_state->steps[*seq_state->num_steps].func = TG_SETUP_VERTICAL_INTERRUPT2; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add DPP set HDR multiplier to block sequence + */ +void hwss_add_dpp_set_hdr_multiplier(struct block_sequence_state *seq_state, + struct dpp *dpp, uint32_t hw_mult) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.dpp_set_hdr_multiplier_params.dpp = dpp; + seq_state->steps[*seq_state->num_steps].params.dpp_set_hdr_multiplier_params.hw_mult = hw_mult; + seq_state->steps[*seq_state->num_steps].func = DPP_SET_HDR_MULTIPLIER; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP program DET size to block sequence + */ +void hwss_add_hubp_program_det_size(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + unsigned int hubp_inst, + unsigned int det_buffer_size_kb) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_det_size_params.hubbub = hubbub; + seq_state->steps[*seq_state->num_steps].params.program_det_size_params.hubp_inst = hubp_inst; + seq_state->steps[*seq_state->num_steps].params.program_det_size_params.det_buffer_size_kb = det_buffer_size_kb; + seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_DET_SIZE; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_program_mcache_id(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct dml2_hubp_pipe_mcache_regs *mcache_regs) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_mcache_id_and_split_coordinate.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.program_mcache_id_and_split_coordinate.mcache_regs = mcache_regs; + seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_MCACHE_ID; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubbub_force_pstate_change_control(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + bool enable, + bool wait) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.hubbub = hubbub; + seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.enable = enable; + seq_state->steps[*seq_state->num_steps].params.hubbub_force_pstate_change_control_params.wait = wait; + seq_state->steps[*seq_state->num_steps].func = HUBBUB_FORCE_PSTATE_CHANGE_CONTROL; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP program DET segments to block sequence + */ +void hwss_add_hubp_program_det_segments(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + unsigned int hubp_inst, + unsigned int det_size) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.hubbub = hubbub; + seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.hubp_inst = hubp_inst; + seq_state->steps[*seq_state->num_steps].params.program_det_segments_params.det_size = det_size; + seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_DET_SEGMENTS; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add OPP set dynamic expansion to block sequence + */ +void hwss_add_opp_set_dyn_expansion(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + enum dc_color_space color_space, + enum dc_color_depth color_depth, + enum signal_type signal) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.color_space = color_space; + seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.color_depth = color_depth; + seq_state->steps[*seq_state->num_steps].params.opp_set_dyn_expansion_params.signal = signal; + seq_state->steps[*seq_state->num_steps].func = OPP_SET_DYN_EXPANSION; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add OPP program FMT to block sequence + */ +void hwss_add_opp_program_fmt(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + struct bit_depth_reduction_params *fmt_bit_depth, + struct clamping_and_pixel_encoding_params *clamping) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.fmt_bit_depth = fmt_bit_depth; + seq_state->steps[*seq_state->num_steps].params.opp_program_fmt_params.clamping = clamping; + seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_FMT; + (*seq_state->num_steps)++; + } +} + +void hwss_add_opp_program_left_edge_extra_pixel(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + enum dc_pixel_encoding pixel_encoding, + bool is_otg_master) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_LEFT_EDGE_EXTRA_PIXEL; + seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.pixel_encoding = pixel_encoding; + seq_state->steps[*seq_state->num_steps].params.opp_program_left_edge_extra_pixel_params.is_otg_master = is_otg_master; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add ABM set pipe to block sequence + */ +void hwss_add_abm_set_pipe(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_abm_pipe_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_abm_pipe_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].func = ABM_SET_PIPE; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add ABM set level to block sequence + */ +void hwss_add_abm_set_level(struct block_sequence_state *seq_state, + struct abm *abm, + uint32_t abm_level) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.set_abm_level_params.abm = abm; + seq_state->steps[*seq_state->num_steps].params.set_abm_level_params.abm_level = abm_level; + seq_state->steps[*seq_state->num_steps].func = ABM_SET_LEVEL; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add TG enable CRTC to block sequence + */ +void hwss_add_tg_enable_crtc(struct block_sequence_state *seq_state, + struct timing_generator *tg) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_enable_crtc_params.tg = tg; + seq_state->steps[*seq_state->num_steps].func = TG_ENABLE_CRTC; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP wait flip pending to block sequence + */ +void hwss_add_hubp_wait_flip_pending(struct block_sequence_state *seq_state, + struct hubp *hubp, + unsigned int timeout_us, + unsigned int polling_interval_us) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.timeout_us = timeout_us; + seq_state->steps[*seq_state->num_steps].params.hubp_wait_flip_pending_params.polling_interval_us = polling_interval_us; + seq_state->steps[*seq_state->num_steps].func = HUBP_WAIT_FLIP_PENDING; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add TG wait double buffer pending to block sequence + */ +void hwss_add_tg_wait_double_buffer_pending(struct block_sequence_state *seq_state, + struct timing_generator *tg, + unsigned int timeout_us, + unsigned int polling_interval_us) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.timeout_us = timeout_us; + seq_state->steps[*seq_state->num_steps].params.tg_wait_double_buffer_pending_params.polling_interval_us = polling_interval_us; + seq_state->steps[*seq_state->num_steps].func = TG_WAIT_DOUBLE_BUFFER_PENDING; + (*seq_state->num_steps)++; + } +} + +void hwss_program_manual_trigger(union block_sequence_params *params) +{ + struct pipe_ctx *pipe_ctx = params->program_manual_trigger_params.pipe_ctx; + + if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger) + pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg); +} + +void hwss_setup_dpp(union block_sequence_params *params) +{ + struct pipe_ctx *pipe_ctx = params->setup_dpp_params.pipe_ctx; + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dc_plane_state *plane_state = pipe_ctx->plane_state; + + if (!plane_state) + return; + + if (dpp && dpp->funcs->dpp_setup) { + // program the input csc + dpp->funcs->dpp_setup(dpp, + plane_state->format, + EXPANSION_MODE_ZERO, + plane_state->input_csc_color_matrix, + plane_state->color_space, + NULL); + } +} + +void hwss_program_bias_and_scale(union block_sequence_params *params) +{ + struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dc_plane_state *plane_state = pipe_ctx->plane_state; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; + + //TODO :for CNVC set scale and bias registers if necessary + if (dpp->funcs->dpp_program_bias_and_scale) { + dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + } +} + +void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) +{ + struct mpc *mpc = params->power_on_mpc_mem_pwr_params.mpc; + int mpcc_id = params->power_on_mpc_mem_pwr_params.mpcc_id; + bool power_on = params->power_on_mpc_mem_pwr_params.power_on; + + if (mpc->funcs->power_on_mpc_mem_pwr) + mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, power_on); +} + +void hwss_set_output_csc(union block_sequence_params *params) +{ + struct mpc *mpc = params->set_output_csc_params.mpc; + int opp_id = params->set_output_csc_params.opp_id; + const uint16_t *matrix = params->set_output_csc_params.regval; + enum mpc_output_csc_mode ocsc_mode = params->set_output_csc_params.ocsc_mode; + + if (mpc->funcs->set_output_csc != NULL) + mpc->funcs->set_output_csc(mpc, + opp_id, + matrix, + ocsc_mode); +} + +void hwss_set_ocsc_default(union block_sequence_params *params) +{ + struct mpc *mpc = params->set_ocsc_default_params.mpc; + int opp_id = params->set_ocsc_default_params.opp_id; + enum dc_color_space colorspace = params->set_ocsc_default_params.color_space; + enum mpc_output_csc_mode ocsc_mode = params->set_ocsc_default_params.ocsc_mode; + + if (mpc->funcs->set_ocsc_default != NULL) + mpc->funcs->set_ocsc_default(mpc, + opp_id, + colorspace, + ocsc_mode); +} + +void hwss_subvp_save_surf_addr(union block_sequence_params *params) +{ + struct dc_dmub_srv *dc_dmub_srv = params->subvp_save_surf_addr.dc_dmub_srv; + const struct dc_plane_address *addr = params->subvp_save_surf_addr.addr; + uint8_t subvp_index = params->subvp_save_surf_addr.subvp_index; + + dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index); +} + +void hwss_program_surface_config(union block_sequence_params *params) +{ + struct hubp *hubp = params->program_surface_config_params.hubp; + enum surface_pixel_format format = params->program_surface_config_params.format; + struct dc_tiling_info *tiling_info = params->program_surface_config_params.tiling_info; + struct plane_size size = params->program_surface_config_params.plane_size; + enum dc_rotation_angle rotation = params->program_surface_config_params.rotation; + struct dc_plane_dcc_param *dcc = params->program_surface_config_params.dcc; + bool horizontal_mirror = params->program_surface_config_params.horizontal_mirror; + int compat_level = params->program_surface_config_params.compat_level; + + hubp->funcs->hubp_program_surface_config( + hubp, + format, + tiling_info, + &size, + rotation, + dcc, + horizontal_mirror, + compat_level); + + hubp->power_gated = false; +} + +void hwss_program_mcache_id_and_split_coordinate(union block_sequence_params *params) +{ + struct hubp *hubp = params->program_mcache_id_and_split_coordinate.hubp; + struct dml2_hubp_pipe_mcache_regs *mcache_regs = params->program_mcache_id_and_split_coordinate.mcache_regs; + + hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, mcache_regs); + +} + +void get_surface_tile_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + /* Determine the overscan color based on the bottom-most plane's context */ + struct pipe_ctx *bottom_pipe_ctx = pipe_ctx; + + while (bottom_pipe_ctx->bottom_pipe != NULL) + bottom_pipe_ctx = bottom_pipe_ctx->bottom_pipe; + + switch (bottom_pipe_ctx->plane_state->tiling_info.gfx9.swizzle) { + case DC_SW_LINEAR: + /* LINEAR Surface - set border color to red */ + color->color_r_cr = color_value; + break; + default: + break; + } +} + +/** + * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank + * pattern updates + * + * @dc: [in] dc reference + * @context: [in] hardware context in use + */ +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context) +{ + struct pipe_ctx *opp_head; + struct dce_hwseq *hws = dc->hwseq; + int i; + + if (!hws->funcs.wait_for_blank_complete) + return; + + for (i = 0; i < MAX_PIPES; i++) { + opp_head = &context->res_ctx.pipe_ctx[i]; + + if (!resource_is_pipe_type(opp_head, OPP_HEAD) || + dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) + continue; + + hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); + } +} + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *otg_master; + struct timing_generator *tg; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + otg_master = &context->res_ctx.pipe_ctx[i]; + if (!resource_is_pipe_type(otg_master, OTG_MASTER) || + dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) + continue; + tg = otg_master->stream_res.tg; + if (tg->funcs->wait_odm_doublebuffer_pending_clear) + tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + if (tg->funcs->wait_otg_disable) + tg->funcs->wait_otg_disable(tg); + } + + /* ODM update may require to reprogram blank pattern for each OPP */ + hwss_wait_for_all_blank_complete(dc, context); +} + +void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < MAX_PIPES; i++) { + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + + /* Timeout 100 ms */ + while (count < 100000) { + /* Must set to false to start with, due to OR in update function */ + pipe->plane_state->status.is_flip_pending = false; + dc->hwss.update_pending_status(pipe); + if (!pipe->plane_state->status.is_flip_pending) + break; + udelay(1); + count++; + } + ASSERT(!pipe->plane_state->status.is_flip_pending); + } +} + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ +/* + * This function calls HWSS to wait for any potentially double buffered + * operations to complete. It should be invoked as a pre-amble prior + * to full update programming before asserting any HW locks. + */ + int pipe_idx; + int opp_inst; + int opp_count = dc->res_pool->res_cap->num_opp; + struct hubp *hubp; + int mpcc_inst; + const struct pipe_ctx *pipe_ctx; + + for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { + pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; + + if (!pipe_ctx->stream) + continue; + + /* For full update we must wait for all double buffer updates, not just DRR updates. This + * is particularly important for minimal transitions. Only check for OTG_MASTER pipes, + * as non-OTG Master pipes share the same OTG as + */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && dc->hwss.wait_for_all_pending_updates) { + dc->hwss.wait_for_all_pending_updates(pipe_ctx); + } + + hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + + mpcc_inst = hubp->inst; + // MPCC inst is equal to pipe index in practice + for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { + if ((dc->res_pool->opps[opp_inst] != NULL) && + (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { + dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); + dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; + break; + } + } + } + hwss_wait_for_odm_update_pending_complete(dc, dc_context); +} + +void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ + /* wait for outstanding updates */ + hwss_wait_for_outstanding_hw_updates(dc, dc_context); + + /* perform outstanding post update programming */ + if (dc->hwss.program_outstanding_updates) + dc->hwss.program_outstanding_updates(dc, dc_context); +} + +void hwss_set_odm_combine(union block_sequence_params *params) +{ + struct timing_generator *tg = params->set_odm_combine_params.tg; + int *opp_inst = params->set_odm_combine_params.opp_inst; + int opp_head_count = params->set_odm_combine_params.opp_head_count; + int odm_slice_width = params->set_odm_combine_params.odm_slice_width; + int last_odm_slice_width = params->set_odm_combine_params.last_odm_slice_width; + + if (tg && tg->funcs->set_odm_combine) + tg->funcs->set_odm_combine(tg, opp_inst, opp_head_count, + odm_slice_width, last_odm_slice_width); +} + +void hwss_set_odm_bypass(union block_sequence_params *params) +{ + struct timing_generator *tg = params->set_odm_bypass_params.tg; + const struct dc_crtc_timing *timing = params->set_odm_bypass_params.timing; + + if (tg && tg->funcs->set_odm_bypass) + tg->funcs->set_odm_bypass(tg, timing); +} + +void hwss_opp_pipe_clock_control(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_pipe_clock_control_params.opp; + bool enable = params->opp_pipe_clock_control_params.enable; + + if (opp && opp->funcs->opp_pipe_clock_control) + opp->funcs->opp_pipe_clock_control(opp, enable); +} + +void hwss_opp_program_left_edge_extra_pixel(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_program_left_edge_extra_pixel_params.opp; + enum dc_pixel_encoding pixel_encoding = params->opp_program_left_edge_extra_pixel_params.pixel_encoding; + bool is_otg_master = params->opp_program_left_edge_extra_pixel_params.is_otg_master; + + if (opp && opp->funcs->opp_program_left_edge_extra_pixel) + opp->funcs->opp_program_left_edge_extra_pixel(opp, pixel_encoding, is_otg_master); +} + +void hwss_dccg_set_dto_dscclk(union block_sequence_params *params) +{ + struct dccg *dccg = params->dccg_set_dto_dscclk_params.dccg; + int inst = params->dccg_set_dto_dscclk_params.inst; + int num_slices_h = params->dccg_set_dto_dscclk_params.num_slices_h; + + if (dccg && dccg->funcs->set_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, inst, num_slices_h); +} + +void hwss_dsc_set_config(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_set_config_params.dsc; + struct dsc_config *dsc_cfg = params->dsc_set_config_params.dsc_cfg; + struct dsc_optc_config *dsc_optc_cfg = params->dsc_set_config_params.dsc_optc_cfg; + + if (dsc && dsc->funcs->dsc_set_config) + dsc->funcs->dsc_set_config(dsc, dsc_cfg, dsc_optc_cfg); +} + +void hwss_dsc_enable(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_enable_params.dsc; + int opp_inst = params->dsc_enable_params.opp_inst; + + if (dsc && dsc->funcs->dsc_enable) + dsc->funcs->dsc_enable(dsc, opp_inst); +} + +void hwss_tg_set_dsc_config(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_set_dsc_config_params.tg; + enum optc_dsc_mode optc_dsc_mode = OPTC_DSC_DISABLED; + uint32_t bytes_per_pixel = 0; + uint32_t slice_width = 0; + + if (params->tg_set_dsc_config_params.enable) { + struct dsc_optc_config *dsc_optc_cfg = params->tg_set_dsc_config_params.dsc_optc_cfg; + if (dsc_optc_cfg) { + bytes_per_pixel = dsc_optc_cfg->bytes_per_pixel; + slice_width = dsc_optc_cfg->slice_width; + optc_dsc_mode = dsc_optc_cfg->is_pixel_format_444 ? + OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED; + } + } + + if (tg && tg->funcs->set_dsc_config) + tg->funcs->set_dsc_config(tg, optc_dsc_mode, bytes_per_pixel, slice_width); +} + +void hwss_dsc_disconnect(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_disconnect_params.dsc; + + if (dsc && dsc->funcs->dsc_disconnect) + dsc->funcs->dsc_disconnect(dsc); +} + +void hwss_dsc_read_state(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_read_state_params.dsc; + struct dcn_dsc_state *dsc_state = params->dsc_read_state_params.dsc_state; + + if (dsc && dsc->funcs->dsc_read_state) + dsc->funcs->dsc_read_state(dsc, dsc_state); +} + +void hwss_dsc_calculate_and_set_config(union block_sequence_params *params) +{ + struct pipe_ctx *pipe_ctx = params->dsc_calculate_and_set_config_params.pipe_ctx; + struct pipe_ctx *top_pipe = pipe_ctx; + bool enable = params->dsc_calculate_and_set_config_params.enable; + int opp_cnt = params->dsc_calculate_and_set_config_params.opp_cnt; + + struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + struct dc_stream_state *stream = pipe_ctx->stream; + + if (!dsc || !enable) + return; + + /* Calculate DSC configuration - extracted from dcn32_update_dsc_on_stream */ + struct dsc_config dsc_cfg; + + while (top_pipe->prev_odm_pipe) + top_pipe = top_pipe->prev_odm_pipe; + + dsc_cfg.pic_width = (stream->timing.h_addressable + top_pipe->dsc_padding_params.dsc_hactive_padding + + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; + dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; + dsc_cfg.color_depth = stream->timing.display_color_depth; + dsc_cfg.is_odm = top_pipe->next_odm_pipe ? true : false; + dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; + dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + dsc_cfg.dsc_padding = top_pipe->dsc_padding_params.dsc_hactive_padding; + + /* Set DSC configuration */ + if (dsc->funcs->dsc_set_config) + dsc->funcs->dsc_set_config(dsc, &dsc_cfg, + ¶ms->dsc_calculate_and_set_config_params.dsc_optc_cfg); +} + +void hwss_dsc_enable_with_opp(union block_sequence_params *params) +{ + struct pipe_ctx *pipe_ctx = params->dsc_enable_with_opp_params.pipe_ctx; + struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + + if (dsc && dsc->funcs->dsc_enable) + dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); +} + +void hwss_tg_program_global_sync(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_program_global_sync_params.tg; + int vready_offset = params->tg_program_global_sync_params.vready_offset; + unsigned int vstartup_lines = params->tg_program_global_sync_params.vstartup_lines; + unsigned int vupdate_offset_pixels = params->tg_program_global_sync_params.vupdate_offset_pixels; + unsigned int vupdate_vupdate_width_pixels = params->tg_program_global_sync_params.vupdate_vupdate_width_pixels; + unsigned int pstate_keepout_start_lines = params->tg_program_global_sync_params.pstate_keepout_start_lines; + + if (tg->funcs->program_global_sync) { + tg->funcs->program_global_sync(tg, vready_offset, vstartup_lines, + vupdate_offset_pixels, vupdate_vupdate_width_pixels, pstate_keepout_start_lines); + } +} + +void hwss_tg_wait_for_state(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_wait_for_state_params.tg; + enum crtc_state state = params->tg_wait_for_state_params.state; + + if (tg->funcs->wait_for_state) { + tg->funcs->wait_for_state(tg, state); + } +} + +void hwss_tg_set_vtg_params(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_set_vtg_params_params.tg; + struct dc_crtc_timing *timing = params->tg_set_vtg_params_params.timing; + bool program_fp2 = params->tg_set_vtg_params_params.program_fp2; + + if (tg->funcs->set_vtg_params) { + tg->funcs->set_vtg_params(tg, timing, program_fp2); + } +} + +void hwss_tg_setup_vertical_interrupt2(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_setup_vertical_interrupt2_params.tg; + int start_line = params->tg_setup_vertical_interrupt2_params.start_line; + + if (tg->funcs->setup_vertical_interrupt2) { + tg->funcs->setup_vertical_interrupt2(tg, start_line); + } +} + +void hwss_dpp_set_hdr_multiplier(union block_sequence_params *params) +{ + struct dpp *dpp = params->dpp_set_hdr_multiplier_params.dpp; + uint32_t hw_mult = params->dpp_set_hdr_multiplier_params.hw_mult; + + if (dpp->funcs->dpp_set_hdr_multiplier) { + dpp->funcs->dpp_set_hdr_multiplier(dpp, hw_mult); + } +} + +void hwss_program_det_size(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->program_det_size_params.hubbub; + unsigned int hubp_inst = params->program_det_size_params.hubp_inst; + unsigned int det_buffer_size_kb = params->program_det_size_params.det_buffer_size_kb; + + if (hubbub->funcs->program_det_size) { + hubbub->funcs->program_det_size(hubbub, hubp_inst, det_buffer_size_kb); + } +} + +void hwss_program_det_segments(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->program_det_segments_params.hubbub; + unsigned int hubp_inst = params->program_det_segments_params.hubp_inst; + unsigned int det_size = params->program_det_segments_params.det_size; + + if (hubbub->funcs->program_det_segments) { + hubbub->funcs->program_det_segments(hubbub, hubp_inst, det_size); + } +} + +void hwss_opp_set_dyn_expansion(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_set_dyn_expansion_params.opp; + enum dc_color_space color_space = params->opp_set_dyn_expansion_params.color_space; + enum dc_color_depth color_depth = params->opp_set_dyn_expansion_params.color_depth; + enum signal_type signal = params->opp_set_dyn_expansion_params.signal; + + if (opp->funcs->opp_set_dyn_expansion) { + opp->funcs->opp_set_dyn_expansion(opp, color_space, color_depth, signal); + } +} + +void hwss_opp_program_fmt(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_program_fmt_params.opp; + struct bit_depth_reduction_params *fmt_bit_depth = params->opp_program_fmt_params.fmt_bit_depth; + struct clamping_and_pixel_encoding_params *clamping = params->opp_program_fmt_params.clamping; + + if (opp->funcs->opp_program_fmt) { + opp->funcs->opp_program_fmt(opp, fmt_bit_depth, clamping); + } +} + +void hwss_opp_program_bit_depth_reduction(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_program_bit_depth_reduction_params.opp; + bool use_default_params = params->opp_program_bit_depth_reduction_params.use_default_params; + struct pipe_ctx *pipe_ctx = params->opp_program_bit_depth_reduction_params.pipe_ctx; + struct bit_depth_reduction_params bit_depth_params; + + if (use_default_params) { + memset(&bit_depth_params, 0, sizeof(bit_depth_params)); + } else { + resource_build_bit_depth_reduction_params(pipe_ctx->stream, &bit_depth_params); + } + + if (opp->funcs->opp_program_bit_depth_reduction) { + opp->funcs->opp_program_bit_depth_reduction(opp, &bit_depth_params); + } +} + +void hwss_opp_set_disp_pattern_generator(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_set_disp_pattern_generator_params.opp; + enum controller_dp_test_pattern test_pattern = params->opp_set_disp_pattern_generator_params.test_pattern; + enum controller_dp_color_space color_space = params->opp_set_disp_pattern_generator_params.color_space; + enum dc_color_depth color_depth = params->opp_set_disp_pattern_generator_params.color_depth; + struct tg_color *solid_color = params->opp_set_disp_pattern_generator_params.use_solid_color ? + ¶ms->opp_set_disp_pattern_generator_params.solid_color : NULL; + int width = params->opp_set_disp_pattern_generator_params.width; + int height = params->opp_set_disp_pattern_generator_params.height; + int offset = params->opp_set_disp_pattern_generator_params.offset; + + if (opp && opp->funcs->opp_set_disp_pattern_generator) { + opp->funcs->opp_set_disp_pattern_generator(opp, test_pattern, color_space, + color_depth, solid_color, width, height, offset); + } +} + +void hwss_set_abm_pipe(union block_sequence_params *params) +{ + struct dc *dc = params->set_abm_pipe_params.dc; + struct pipe_ctx *pipe_ctx = params->set_abm_pipe_params.pipe_ctx; + + dc->hwss.set_pipe(pipe_ctx); +} + +void hwss_set_abm_level(union block_sequence_params *params) +{ + struct abm *abm = params->set_abm_level_params.abm; + unsigned int abm_level = params->set_abm_level_params.abm_level; + + if (abm->funcs->set_abm_level) { + abm->funcs->set_abm_level(abm, abm_level); + } +} + +void hwss_set_abm_immediate_disable(union block_sequence_params *params) +{ + struct dc *dc = params->set_abm_immediate_disable_params.dc; + struct pipe_ctx *pipe_ctx = params->set_abm_immediate_disable_params.pipe_ctx; + + if (dc && dc->hwss.set_abm_immediate_disable) { + dc->hwss.set_abm_immediate_disable(pipe_ctx); + } +} + +void hwss_mpc_remove_mpcc(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_remove_mpcc_params.mpc; + struct mpc_tree *mpc_tree_params = params->mpc_remove_mpcc_params.mpc_tree_params; + struct mpcc *mpcc_to_remove = params->mpc_remove_mpcc_params.mpcc_to_remove; + + mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); +} + +void hwss_opp_set_mpcc_disconnect_pending(union block_sequence_params *params) +{ + struct output_pixel_processor *opp = params->opp_set_mpcc_disconnect_pending_params.opp; + int mpcc_inst = params->opp_set_mpcc_disconnect_pending_params.mpcc_inst; + bool pending = params->opp_set_mpcc_disconnect_pending_params.pending; + + opp->mpcc_disconnect_pending[mpcc_inst] = pending; +} + +void hwss_dc_set_optimized_required(union block_sequence_params *params) +{ + struct dc *dc = params->dc_set_optimized_required_params.dc; + bool optimized_required = params->dc_set_optimized_required_params.optimized_required; + + dc->optimized_required = optimized_required; +} + +void hwss_hubp_disconnect(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_disconnect_params.hubp; + + if (hubp->funcs->hubp_disconnect) + hubp->funcs->hubp_disconnect(hubp); +} + +void hwss_hubbub_force_pstate_change_control(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->hubbub_force_pstate_change_control_params.hubbub; + bool enable = params->hubbub_force_pstate_change_control_params.enable; + bool wait = params->hubbub_force_pstate_change_control_params.wait; + + if (hubbub->funcs->force_pstate_change_control) { + hubbub->funcs->force_pstate_change_control(hubbub, enable, wait); + /* Add delay when enabling pstate change control */ + if (enable) + udelay(500); + } +} + +void hwss_tg_enable_crtc(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_enable_crtc_params.tg; + + if (tg->funcs->enable_crtc) + tg->funcs->enable_crtc(tg); +} + +void hwss_tg_set_gsl(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_set_gsl_params.tg; + struct gsl_params *gsl = ¶ms->tg_set_gsl_params.gsl; + + if (tg->funcs->set_gsl) + tg->funcs->set_gsl(tg, gsl); +} + +void hwss_tg_set_gsl_source_select(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_set_gsl_source_select_params.tg; + int group_idx = params->tg_set_gsl_source_select_params.group_idx; + uint32_t gsl_ready_signal = params->tg_set_gsl_source_select_params.gsl_ready_signal; + + if (tg->funcs->set_gsl_source_select) + tg->funcs->set_gsl_source_select(tg, group_idx, gsl_ready_signal); +} + +void hwss_hubp_wait_flip_pending(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_wait_flip_pending_params.hubp; + unsigned int timeout_us = params->hubp_wait_flip_pending_params.timeout_us; + unsigned int polling_interval_us = params->hubp_wait_flip_pending_params.polling_interval_us; + int j = 0; + + for (j = 0; j < timeout_us / polling_interval_us + && hubp->funcs->hubp_is_flip_pending(hubp); j++) + udelay(polling_interval_us); +} + +void hwss_tg_wait_double_buffer_pending(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_wait_double_buffer_pending_params.tg; + unsigned int timeout_us = params->tg_wait_double_buffer_pending_params.timeout_us; + unsigned int polling_interval_us = params->tg_wait_double_buffer_pending_params.polling_interval_us; + int j = 0; + + if (tg->funcs->get_optc_double_buffer_pending) { + for (j = 0; j < timeout_us / polling_interval_us + && tg->funcs->get_optc_double_buffer_pending(tg); j++) + udelay(polling_interval_us); + } +} + +void hwss_update_force_pstate(union block_sequence_params *params) +{ + struct dc *dc = params->update_force_pstate_params.dc; + struct dc_state *context = params->update_force_pstate_params.context; + struct dce_hwseq *hwseq = dc->hwseq; + + if (hwseq->funcs.update_force_pstate) + hwseq->funcs.update_force_pstate(dc, context); +} + +void hwss_hubbub_apply_dedcn21_147_wa(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->hubbub_apply_dedcn21_147_wa_params.hubbub; + + hubbub->funcs->apply_DEDCN21_147_wa(hubbub); +} + +void hwss_hubbub_allow_self_refresh_control(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->hubbub_allow_self_refresh_control_params.hubbub; + bool allow = params->hubbub_allow_self_refresh_control_params.allow; + + hubbub->funcs->allow_self_refresh_control(hubbub, allow); + + if (!allow && params->hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied) + *params->hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied = true; +} + +void hwss_tg_get_frame_count(union block_sequence_params *params) +{ + struct timing_generator *tg = params->tg_get_frame_count_params.tg; + unsigned int *frame_count = params->tg_get_frame_count_params.frame_count; + + *frame_count = tg->funcs->get_frame_count(tg); +} + +void hwss_mpc_set_dwb_mux(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_set_dwb_mux_params.mpc; + int dwb_id = params->mpc_set_dwb_mux_params.dwb_id; + int mpcc_id = params->mpc_set_dwb_mux_params.mpcc_id; + + if (mpc->funcs->set_dwb_mux) + mpc->funcs->set_dwb_mux(mpc, dwb_id, mpcc_id); +} + +void hwss_mpc_disable_dwb_mux(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_disable_dwb_mux_params.mpc; + unsigned int dwb_id = params->mpc_disable_dwb_mux_params.dwb_id; + + if (mpc->funcs->disable_dwb_mux) + mpc->funcs->disable_dwb_mux(mpc, dwb_id); +} + +void hwss_mcif_wb_config_buf(union block_sequence_params *params) +{ + struct mcif_wb *mcif_wb = params->mcif_wb_config_buf_params.mcif_wb; + struct mcif_buf_params *mcif_buf_params = params->mcif_wb_config_buf_params.mcif_buf_params; + unsigned int dest_height = params->mcif_wb_config_buf_params.dest_height; + + if (mcif_wb->funcs->config_mcif_buf) + mcif_wb->funcs->config_mcif_buf(mcif_wb, mcif_buf_params, dest_height); +} + +void hwss_mcif_wb_config_arb(union block_sequence_params *params) +{ + struct mcif_wb *mcif_wb = params->mcif_wb_config_arb_params.mcif_wb; + struct mcif_arb_params *mcif_arb_params = params->mcif_wb_config_arb_params.mcif_arb_params; + + if (mcif_wb->funcs->config_mcif_arb) + mcif_wb->funcs->config_mcif_arb(mcif_wb, mcif_arb_params); +} + +void hwss_mcif_wb_enable(union block_sequence_params *params) +{ + struct mcif_wb *mcif_wb = params->mcif_wb_enable_params.mcif_wb; + + if (mcif_wb->funcs->enable_mcif) + mcif_wb->funcs->enable_mcif(mcif_wb); +} + +void hwss_mcif_wb_disable(union block_sequence_params *params) +{ + struct mcif_wb *mcif_wb = params->mcif_wb_disable_params.mcif_wb; + + if (mcif_wb->funcs->disable_mcif) + mcif_wb->funcs->disable_mcif(mcif_wb); +} + +void hwss_dwbc_enable(union block_sequence_params *params) +{ + struct dwbc *dwb = params->dwbc_enable_params.dwb; + struct dc_dwb_params *dwb_params = params->dwbc_enable_params.dwb_params; + + if (dwb->funcs->enable) + dwb->funcs->enable(dwb, dwb_params); +} + +void hwss_dwbc_disable(union block_sequence_params *params) +{ + struct dwbc *dwb = params->dwbc_disable_params.dwb; + + if (dwb->funcs->disable) + dwb->funcs->disable(dwb); +} + +void hwss_dwbc_update(union block_sequence_params *params) +{ + struct dwbc *dwb = params->dwbc_update_params.dwb; + struct dc_dwb_params *dwb_params = params->dwbc_update_params.dwb_params; + + if (dwb->funcs->update) + dwb->funcs->update(dwb, dwb_params); +} + +void hwss_hubp_update_mall_sel(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_update_mall_sel_params.hubp; + uint32_t mall_sel = params->hubp_update_mall_sel_params.mall_sel; + bool cache_cursor = params->hubp_update_mall_sel_params.cache_cursor; + + if (hubp && hubp->funcs->hubp_update_mall_sel) + hubp->funcs->hubp_update_mall_sel(hubp, mall_sel, cache_cursor); +} + +void hwss_hubp_prepare_subvp_buffering(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_prepare_subvp_buffering_params.hubp; + bool enable = params->hubp_prepare_subvp_buffering_params.enable; + + if (hubp && hubp->funcs->hubp_prepare_subvp_buffering) + hubp->funcs->hubp_prepare_subvp_buffering(hubp, enable); +} + +void hwss_hubp_set_blank_en(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_set_blank_en_params.hubp; + bool enable = params->hubp_set_blank_en_params.enable; + + if (hubp && hubp->funcs->set_hubp_blank_en) + hubp->funcs->set_hubp_blank_en(hubp, enable); +} + +void hwss_hubp_disable_control(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_disable_control_params.hubp; + bool disable = params->hubp_disable_control_params.disable; + + if (hubp && hubp->funcs->hubp_disable_control) + hubp->funcs->hubp_disable_control(hubp, disable); +} + +void hwss_hubbub_soft_reset(union block_sequence_params *params) +{ + struct hubbub *hubbub = params->hubbub_soft_reset_params.hubbub; + bool reset = params->hubbub_soft_reset_params.reset; + + if (hubbub) + params->hubbub_soft_reset_params.hubbub_soft_reset(hubbub, reset); +} + +void hwss_hubp_clk_cntl(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_clk_cntl_params.hubp; + bool enable = params->hubp_clk_cntl_params.enable; + + if (hubp && hubp->funcs->hubp_clk_cntl) { + hubp->funcs->hubp_clk_cntl(hubp, enable); + hubp->power_gated = !enable; + } +} + +void hwss_hubp_init(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_init_params.hubp; + + if (hubp && hubp->funcs->hubp_init) { + hubp->funcs->hubp_init(hubp); + } +} + +void hwss_hubp_set_vm_system_aperture_settings(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_set_vm_system_aperture_settings_params.hubp; + //struct vm_system_aperture_param *apt = ¶ms->hubp_set_vm_system_aperture_settings_params.apt; + struct vm_system_aperture_param apt; + + apt.sys_default = params->hubp_set_vm_system_aperture_settings_params.sys_default; + apt.sys_high = params->hubp_set_vm_system_aperture_settings_params.sys_high; + apt.sys_low = params->hubp_set_vm_system_aperture_settings_params.sys_low; + + if (hubp && hubp->funcs->hubp_set_vm_system_aperture_settings) { + //hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, apt); + hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, &apt); + } +} + +void hwss_hubp_set_flip_int(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_set_flip_int_params.hubp; + + if (hubp && hubp->funcs->hubp_set_flip_int) { + hubp->funcs->hubp_set_flip_int(hubp); + } +} + +void hwss_dpp_dppclk_control(union block_sequence_params *params) +{ + struct dpp *dpp = params->dpp_dppclk_control_params.dpp; + bool dppclk_div = params->dpp_dppclk_control_params.dppclk_div; + bool enable = params->dpp_dppclk_control_params.enable; + + if (dpp && dpp->funcs->dpp_dppclk_control) + dpp->funcs->dpp_dppclk_control(dpp, dppclk_div, enable); +} + +void hwss_disable_phantom_crtc(union block_sequence_params *params) +{ + struct timing_generator *tg = params->disable_phantom_crtc_params.tg; + + if (tg && tg->funcs->disable_phantom_crtc) + tg->funcs->disable_phantom_crtc(tg); +} + +void hwss_dsc_pg_status(union block_sequence_params *params) +{ + struct dce_hwseq *hws = params->dsc_pg_status_params.hws; + int dsc_inst = params->dsc_pg_status_params.dsc_inst; + + if (hws && hws->funcs.dsc_pg_status) + params->dsc_pg_status_params.is_ungated = hws->funcs.dsc_pg_status(hws, dsc_inst); +} + +void hwss_dsc_wait_disconnect_pending_clear(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_wait_disconnect_pending_clear_params.dsc; + + if (!params->dsc_wait_disconnect_pending_clear_params.is_ungated) + return; + if (*params->dsc_wait_disconnect_pending_clear_params.is_ungated == false) + return; + + if (dsc && dsc->funcs->dsc_wait_disconnect_pending_clear) + dsc->funcs->dsc_wait_disconnect_pending_clear(dsc); +} + +void hwss_dsc_disable(union block_sequence_params *params) +{ + struct display_stream_compressor *dsc = params->dsc_disable_params.dsc; + + if (!params->dsc_disable_params.is_ungated) + return; + if (*params->dsc_disable_params.is_ungated == false) + return; + + if (dsc && dsc->funcs->dsc_disable) + dsc->funcs->dsc_disable(dsc); +} + +void hwss_dccg_set_ref_dscclk(union block_sequence_params *params) +{ + struct dccg *dccg = params->dccg_set_ref_dscclk_params.dccg; + int dsc_inst = params->dccg_set_ref_dscclk_params.dsc_inst; + + if (!params->dccg_set_ref_dscclk_params.is_ungated) + return; + if (*params->dccg_set_ref_dscclk_params.is_ungated == false) + return; + + if (dccg && dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc_inst); +} + +void hwss_dpp_pg_control(union block_sequence_params *params) +{ + struct dce_hwseq *hws = params->dpp_pg_control_params.hws; + unsigned int dpp_inst = params->dpp_pg_control_params.dpp_inst; + bool power_on = params->dpp_pg_control_params.power_on; + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp_inst, power_on); +} + +void hwss_hubp_pg_control(union block_sequence_params *params) +{ + struct dce_hwseq *hws = params->hubp_pg_control_params.hws; + unsigned int hubp_inst = params->hubp_pg_control_params.hubp_inst; + bool power_on = params->hubp_pg_control_params.power_on; + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp_inst, power_on); +} + +void hwss_hubp_reset(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_reset_params.hubp; + + if (hubp && hubp->funcs->hubp_reset) + hubp->funcs->hubp_reset(hubp); +} + +void hwss_dpp_reset(union block_sequence_params *params) +{ + struct dpp *dpp = params->dpp_reset_params.dpp; + + if (dpp && dpp->funcs->dpp_reset) + dpp->funcs->dpp_reset(dpp); +} + +void hwss_dpp_root_clock_control(union block_sequence_params *params) +{ + struct dce_hwseq *hws = params->dpp_root_clock_control_params.hws; + unsigned int dpp_inst = params->dpp_root_clock_control_params.dpp_inst; + bool clock_on = params->dpp_root_clock_control_params.clock_on; + + if (hws->funcs.dpp_root_clock_control) + hws->funcs.dpp_root_clock_control(hws, dpp_inst, clock_on); +} + +void hwss_dc_ip_request_cntl(union block_sequence_params *params) +{ + struct dc *dc = params->dc_ip_request_cntl_params.dc; + bool enable = params->dc_ip_request_cntl_params.enable; + struct dce_hwseq *hws = dc->hwseq; + + if (hws->funcs.dc_ip_request_cntl) + hws->funcs.dc_ip_request_cntl(dc, enable); +} + +void hwss_dccg_update_dpp_dto(union block_sequence_params *params) +{ + struct dccg *dccg = params->dccg_update_dpp_dto_params.dccg; + int dpp_inst = params->dccg_update_dpp_dto_params.dpp_inst; + int dppclk_khz = params->dccg_update_dpp_dto_params.dppclk_khz; + + if (dccg && dccg->funcs->update_dpp_dto) + dccg->funcs->update_dpp_dto(dccg, dpp_inst, dppclk_khz); +} + +void hwss_hubp_vtg_sel(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_vtg_sel_params.hubp; + uint32_t otg_inst = params->hubp_vtg_sel_params.otg_inst; + + if (hubp && hubp->funcs->hubp_vtg_sel) + hubp->funcs->hubp_vtg_sel(hubp, otg_inst); +} + +void hwss_hubp_setup2(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_setup2_params.hubp; + struct dml2_dchub_per_pipe_register_set *hubp_regs = params->hubp_setup2_params.hubp_regs; + union dml2_global_sync_programming *global_sync = params->hubp_setup2_params.global_sync; + struct dc_crtc_timing *timing = params->hubp_setup2_params.timing; + + if (hubp && hubp->funcs->hubp_setup2) + hubp->funcs->hubp_setup2(hubp, hubp_regs, global_sync, timing); +} + +void hwss_hubp_setup(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_setup_params.hubp; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = params->hubp_setup_params.dlg_regs; + struct _vcs_dpi_display_ttu_regs_st *ttu_regs = params->hubp_setup_params.ttu_regs; + struct _vcs_dpi_display_rq_regs_st *rq_regs = params->hubp_setup_params.rq_regs; + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest = params->hubp_setup_params.pipe_dest; + + if (hubp && hubp->funcs->hubp_setup) + hubp->funcs->hubp_setup(hubp, dlg_regs, ttu_regs, rq_regs, pipe_dest); +} + +void hwss_hubp_set_unbounded_requesting(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_set_unbounded_requesting_params.hubp; + bool unbounded_req = params->hubp_set_unbounded_requesting_params.unbounded_req; + + if (hubp && hubp->funcs->set_unbounded_requesting) + hubp->funcs->set_unbounded_requesting(hubp, unbounded_req); +} + +void hwss_hubp_setup_interdependent2(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_setup_interdependent2_params.hubp; + struct dml2_dchub_per_pipe_register_set *hubp_regs = params->hubp_setup_interdependent2_params.hubp_regs; + + if (hubp && hubp->funcs->hubp_setup_interdependent2) + hubp->funcs->hubp_setup_interdependent2(hubp, hubp_regs); +} + +void hwss_hubp_setup_interdependent(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_setup_interdependent_params.hubp; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = params->hubp_setup_interdependent_params.dlg_regs; + struct _vcs_dpi_display_ttu_regs_st *ttu_regs = params->hubp_setup_interdependent_params.ttu_regs; + + if (hubp && hubp->funcs->hubp_setup_interdependent) + hubp->funcs->hubp_setup_interdependent(hubp, dlg_regs, ttu_regs); +} + +void hwss_dpp_set_cursor_matrix(union block_sequence_params *params) +{ + struct dpp *dpp = params->dpp_set_cursor_matrix_params.dpp; + enum dc_color_space color_space = params->dpp_set_cursor_matrix_params.color_space; + struct dc_csc_transform *cursor_csc_color_matrix = params->dpp_set_cursor_matrix_params.cursor_csc_color_matrix; + + if (dpp && dpp->funcs->set_cursor_matrix) + dpp->funcs->set_cursor_matrix(dpp, color_space, *cursor_csc_color_matrix); +} + +void hwss_mpc_update_mpcc(union block_sequence_params *params) +{ + struct dc *dc = params->mpc_update_mpcc_params.dc; + struct pipe_ctx *pipe_ctx = params->mpc_update_mpcc_params.pipe_ctx; + struct dce_hwseq *hws = dc->hwseq; + + if (hws->funcs.update_mpcc) + hws->funcs.update_mpcc(dc, pipe_ctx); +} + +void hwss_mpc_update_blending(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_update_blending_params.mpc; + struct mpcc_blnd_cfg *blnd_cfg = ¶ms->mpc_update_blending_params.blnd_cfg; + int mpcc_id = params->mpc_update_blending_params.mpcc_id; + + if (mpc && mpc->funcs->update_blending) + mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id); +} + +void hwss_mpc_assert_idle_mpcc(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_assert_idle_mpcc_params.mpc; + //struct pipe_ctx *pipe_ctx = params->mpc_assert_idle_mpcc_params.pipe_ctx; + int mpcc_id = params->mpc_assert_idle_mpcc_params.mpcc_id; + + if (mpc && mpc->funcs->wait_for_idle) + mpc->funcs->wait_for_idle(mpc, mpcc_id); + + //pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_id] = false; +} + +void hwss_mpc_insert_plane(union block_sequence_params *params) +{ + struct mpc *mpc = params->mpc_insert_plane_params.mpc; + struct mpc_tree *tree = params->mpc_insert_plane_params.mpc_tree_params; + struct mpcc_blnd_cfg *blnd_cfg = ¶ms->mpc_insert_plane_params.blnd_cfg; + struct mpcc_sm_cfg *sm_cfg = params->mpc_insert_plane_params.sm_cfg; + struct mpcc *insert_above_mpcc = params->mpc_insert_plane_params.insert_above_mpcc; + int mpcc_id = params->mpc_insert_plane_params.mpcc_id; + int dpp_id = params->mpc_insert_plane_params.dpp_id; + + if (mpc && mpc->funcs->insert_plane) + mpc->funcs->insert_plane(mpc, tree, blnd_cfg, sm_cfg, insert_above_mpcc, + dpp_id, mpcc_id); +} + +void hwss_dpp_set_scaler(union block_sequence_params *params) +{ + struct dpp *dpp = params->dpp_set_scaler_params.dpp; + const struct scaler_data *scl_data = params->dpp_set_scaler_params.scl_data; + + if (dpp && dpp->funcs->dpp_set_scaler) + dpp->funcs->dpp_set_scaler(dpp, scl_data); +} + +void hwss_hubp_mem_program_viewport(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_mem_program_viewport_params.hubp; + const struct rect *viewport = params->hubp_mem_program_viewport_params.viewport; + const struct rect *viewport_c = params->hubp_mem_program_viewport_params.viewport_c; + + if (hubp && hubp->funcs->mem_program_viewport) + hubp->funcs->mem_program_viewport(hubp, viewport, viewport_c); +} + +void hwss_set_cursor_attribute(union block_sequence_params *params) +{ + struct dc *dc = params->set_cursor_attribute_params.dc; + struct pipe_ctx *pipe_ctx = params->set_cursor_attribute_params.pipe_ctx; + + if (dc && dc->hwss.set_cursor_attribute) + dc->hwss.set_cursor_attribute(pipe_ctx); +} + +void hwss_set_cursor_position(union block_sequence_params *params) +{ + struct dc *dc = params->set_cursor_position_params.dc; + struct pipe_ctx *pipe_ctx = params->set_cursor_position_params.pipe_ctx; + + if (dc && dc->hwss.set_cursor_position) + dc->hwss.set_cursor_position(pipe_ctx); +} + +void hwss_set_cursor_sdr_white_level(union block_sequence_params *params) +{ + struct dc *dc = params->set_cursor_sdr_white_level_params.dc; + struct pipe_ctx *pipe_ctx = params->set_cursor_sdr_white_level_params.pipe_ctx; + + if (dc && dc->hwss.set_cursor_sdr_white_level) + dc->hwss.set_cursor_sdr_white_level(pipe_ctx); +} + +void hwss_program_output_csc(union block_sequence_params *params) +{ + struct dc *dc = params->program_output_csc_params.dc; + struct pipe_ctx *pipe_ctx = params->program_output_csc_params.pipe_ctx; + enum dc_color_space colorspace = params->program_output_csc_params.colorspace; + uint16_t *matrix = params->program_output_csc_params.matrix; + int opp_id = params->program_output_csc_params.opp_id; + + if (dc && dc->hwss.program_output_csc) + dc->hwss.program_output_csc(dc, pipe_ctx, colorspace, matrix, opp_id); +} + +void hwss_hubp_set_blank(union block_sequence_params *params) +{ + struct hubp *hubp = params->hubp_set_blank_params.hubp; + bool blank = params->hubp_set_blank_params.blank; + + if (hubp && hubp->funcs->set_blank) + hubp->funcs->set_blank(hubp, blank); +} + +void hwss_phantom_hubp_post_enable(union block_sequence_params *params) +{ + struct hubp *hubp = params->phantom_hubp_post_enable_params.hubp; + + if (hubp && hubp->funcs->phantom_hubp_post_enable) + hubp->funcs->phantom_hubp_post_enable(hubp); +} + +void hwss_add_dccg_set_dto_dscclk(struct block_sequence_state *seq_state, + struct dccg *dccg, int inst, int num_slices_h) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DCCG_SET_DTO_DSCCLK; + seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.dccg = dccg; + seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.inst = inst; + seq_state->steps[*seq_state->num_steps].params.dccg_set_dto_dscclk_params.num_slices_h = num_slices_h; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dsc_calculate_and_set_config(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx, bool enable, int opp_cnt) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_CALCULATE_AND_SET_CONFIG; + seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.enable = enable; + seq_state->steps[*seq_state->num_steps].params.dsc_calculate_and_set_config_params.opp_cnt = opp_cnt; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mpc_remove_mpcc(struct block_sequence_state *seq_state, + struct mpc *mpc, struct mpc_tree *mpc_tree_params, struct mpcc *mpcc_to_remove) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_REMOVE_MPCC; + seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpc_tree_params = mpc_tree_params; + seq_state->steps[*seq_state->num_steps].params.mpc_remove_mpcc_params.mpcc_to_remove = mpcc_to_remove; + (*seq_state->num_steps)++; + } +} + +void hwss_add_opp_set_mpcc_disconnect_pending(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, int mpcc_inst, bool pending) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = OPP_SET_MPCC_DISCONNECT_PENDING; + seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.mpcc_inst = mpcc_inst; + seq_state->steps[*seq_state->num_steps].params.opp_set_mpcc_disconnect_pending_params.pending = pending; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_disconnect(struct block_sequence_state *seq_state, + struct hubp *hubp) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_DISCONNECT; + seq_state->steps[*seq_state->num_steps].params.hubp_disconnect_params.hubp = hubp; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dsc_enable_with_opp(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_ENABLE_WITH_OPP; + seq_state->steps[*seq_state->num_steps].params.dsc_enable_with_opp_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; + } +} + +void hwss_add_tg_set_dsc_config(struct block_sequence_state *seq_state, + struct timing_generator *tg, struct dsc_optc_config *dsc_optc_cfg, bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = TG_SET_DSC_CONFIG; + seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.dsc_optc_cfg = dsc_optc_cfg; + seq_state->steps[*seq_state->num_steps].params.tg_set_dsc_config_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dsc_disconnect(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_DISCONNECT; + seq_state->steps[*seq_state->num_steps].params.dsc_disconnect_params.dsc = dsc; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dc_set_optimized_required(struct block_sequence_state *seq_state, + struct dc *dc, bool optimized_required) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DC_SET_OPTIMIZED_REQUIRED; + seq_state->steps[*seq_state->num_steps].params.dc_set_optimized_required_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.dc_set_optimized_required_params.optimized_required = optimized_required; + (*seq_state->num_steps)++; + } +} + +void hwss_add_abm_set_immediate_disable(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = ABM_SET_IMMEDIATE_DISABLE; + seq_state->steps[*seq_state->num_steps].params.set_abm_immediate_disable_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_abm_immediate_disable_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; + } +} + +void hwss_add_opp_set_disp_pattern_generator(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + enum controller_dp_test_pattern test_pattern, + enum controller_dp_color_space color_space, + enum dc_color_depth color_depth, + struct tg_color solid_color, + bool use_solid_color, + int width, + int height, + int offset) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = OPP_SET_DISP_PATTERN_GENERATOR; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.test_pattern = test_pattern; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.color_space = color_space; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.color_depth = color_depth; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.solid_color = solid_color; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.use_solid_color = use_solid_color; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.width = width; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.height = height; + seq_state->steps[*seq_state->num_steps].params.opp_set_disp_pattern_generator_params.offset = offset; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC update blending to block sequence + */ +void hwss_add_mpc_update_blending(struct block_sequence_state *seq_state, + struct mpc *mpc, + struct mpcc_blnd_cfg blnd_cfg, + int mpcc_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_UPDATE_BLENDING; + seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.blnd_cfg = blnd_cfg; + seq_state->steps[*seq_state->num_steps].params.mpc_update_blending_params.mpcc_id = mpcc_id; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC insert plane to block sequence + */ +void hwss_add_mpc_insert_plane(struct block_sequence_state *seq_state, + struct mpc *mpc, + struct mpc_tree *mpc_tree_params, + struct mpcc_blnd_cfg blnd_cfg, + struct mpcc_sm_cfg *sm_cfg, + struct mpcc *insert_above_mpcc, + int dpp_id, + int mpcc_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_INSERT_PLANE; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpc_tree_params = mpc_tree_params; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.blnd_cfg = blnd_cfg; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.sm_cfg = sm_cfg; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.insert_above_mpcc = insert_above_mpcc; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.dpp_id = dpp_id; + seq_state->steps[*seq_state->num_steps].params.mpc_insert_plane_params.mpcc_id = mpcc_id; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add MPC assert idle MPCC to block sequence + */ +void hwss_add_mpc_assert_idle_mpcc(struct block_sequence_state *seq_state, + struct mpc *mpc, + int mpcc_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_ASSERT_IDLE_MPCC; + seq_state->steps[*seq_state->num_steps].params.mpc_assert_idle_mpcc_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_assert_idle_mpcc_params.mpcc_id = mpcc_id; + (*seq_state->num_steps)++; + } +} + +/** + * Helper function to add HUBP set blank to block sequence + */ +void hwss_add_hubp_set_blank(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool blank) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_BLANK; + seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_params.blank = blank; + (*seq_state->num_steps)++; + } +} + +void hwss_add_opp_program_bit_depth_reduction(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + bool use_default_params, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = OPP_PROGRAM_BIT_DEPTH_REDUCTION; + seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.use_default_params = use_default_params; + seq_state->steps[*seq_state->num_steps].params.opp_program_bit_depth_reduction_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dc_ip_request_cntl(struct block_sequence_state *seq_state, + struct dc *dc, + bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DC_IP_REQUEST_CNTL; + seq_state->steps[*seq_state->num_steps].params.dc_ip_request_cntl_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.dc_ip_request_cntl_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dwbc_update(struct block_sequence_state *seq_state, + struct dwbc *dwb, + struct dc_dwb_params *dwb_params) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DWBC_UPDATE; + seq_state->steps[*seq_state->num_steps].params.dwbc_update_params.dwb = dwb; + seq_state->steps[*seq_state->num_steps].params.dwbc_update_params.dwb_params = dwb_params; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mcif_wb_config_buf(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb, + struct mcif_buf_params *mcif_buf_params, + unsigned int dest_height) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MCIF_WB_CONFIG_BUF; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.mcif_wb = mcif_wb; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.mcif_buf_params = mcif_buf_params; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_buf_params.dest_height = dest_height; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mcif_wb_config_arb(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb, + struct mcif_arb_params *mcif_arb_params) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MCIF_WB_CONFIG_ARB; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_arb_params.mcif_wb = mcif_wb; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_config_arb_params.mcif_arb_params = mcif_arb_params; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mcif_wb_enable(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MCIF_WB_ENABLE; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_enable_params.mcif_wb = mcif_wb; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mcif_wb_disable(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MCIF_WB_DISABLE; + seq_state->steps[*seq_state->num_steps].params.mcif_wb_disable_params.mcif_wb = mcif_wb; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mpc_set_dwb_mux(struct block_sequence_state *seq_state, + struct mpc *mpc, + int dwb_id, + int mpcc_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_SET_DWB_MUX; + seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.dwb_id = dwb_id; + seq_state->steps[*seq_state->num_steps].params.mpc_set_dwb_mux_params.mpcc_id = mpcc_id; + (*seq_state->num_steps)++; + } +} + +void hwss_add_mpc_disable_dwb_mux(struct block_sequence_state *seq_state, + struct mpc *mpc, + unsigned int dwb_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = MPC_DISABLE_DWB_MUX; + seq_state->steps[*seq_state->num_steps].params.mpc_disable_dwb_mux_params.mpc = mpc; + seq_state->steps[*seq_state->num_steps].params.mpc_disable_dwb_mux_params.dwb_id = dwb_id; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dwbc_enable(struct block_sequence_state *seq_state, + struct dwbc *dwb, + struct dc_dwb_params *dwb_params) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DWBC_ENABLE; + seq_state->steps[*seq_state->num_steps].params.dwbc_enable_params.dwb = dwb; + seq_state->steps[*seq_state->num_steps].params.dwbc_enable_params.dwb_params = dwb_params; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dwbc_disable(struct block_sequence_state *seq_state, + struct dwbc *dwb) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DWBC_DISABLE; + seq_state->steps[*seq_state->num_steps].params.dwbc_disable_params.dwb = dwb; + (*seq_state->num_steps)++; + } +} + +void hwss_add_tg_set_gsl(struct block_sequence_state *seq_state, + struct timing_generator *tg, + struct gsl_params gsl) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = TG_SET_GSL; + seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_params.gsl = gsl; + (*seq_state->num_steps)++; + } +} + +void hwss_add_tg_set_gsl_source_select(struct block_sequence_state *seq_state, + struct timing_generator *tg, + int group_idx, + uint32_t gsl_ready_signal) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = TG_SET_GSL_SOURCE_SELECT; + seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.group_idx = group_idx; + seq_state->steps[*seq_state->num_steps].params.tg_set_gsl_source_select_params.gsl_ready_signal = gsl_ready_signal; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_update_mall_sel(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint32_t mall_sel, + bool cache_cursor) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_UPDATE_MALL_SEL; + seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.mall_sel = mall_sel; + seq_state->steps[*seq_state->num_steps].params.hubp_update_mall_sel_params.cache_cursor = cache_cursor; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_prepare_subvp_buffering(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_PREPARE_SUBVP_BUFFERING; + seq_state->steps[*seq_state->num_steps].params.hubp_prepare_subvp_buffering_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_prepare_subvp_buffering_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_set_blank_en(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_BLANK_EN; + seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_en_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_set_blank_en_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_disable_control(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool disable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_DISABLE_CONTROL; + seq_state->steps[*seq_state->num_steps].params.hubp_disable_control_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_disable_control_params.disable = disable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubbub_soft_reset(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + void (*hubbub_soft_reset)(struct hubbub *hubbub, bool reset), + bool reset) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBBUB_SOFT_RESET; + seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.hubbub = hubbub; + seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.hubbub_soft_reset = hubbub_soft_reset; + seq_state->steps[*seq_state->num_steps].params.hubbub_soft_reset_params.reset = reset; + (*seq_state->num_steps)++; + } +} + +void hwss_add_hubp_clk_cntl(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_CLK_CNTL; + seq_state->steps[*seq_state->num_steps].params.hubp_clk_cntl_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_clk_cntl_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dpp_dppclk_control(struct block_sequence_state *seq_state, + struct dpp *dpp, + bool dppclk_div, + bool enable) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_DPPCLK_CONTROL; + seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.dpp = dpp; + seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.dppclk_div = dppclk_div; + seq_state->steps[*seq_state->num_steps].params.dpp_dppclk_control_params.enable = enable; + (*seq_state->num_steps)++; + } +} + +void hwss_add_disable_phantom_crtc(struct block_sequence_state *seq_state, + struct timing_generator *tg) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DISABLE_PHANTOM_CRTC; + seq_state->steps[*seq_state->num_steps].params.disable_phantom_crtc_params.tg = tg; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dsc_pg_status(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + int dsc_inst, + bool is_ungated) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_PG_STATUS; + seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.hws = hws; + seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.dsc_inst = dsc_inst; + seq_state->steps[*seq_state->num_steps].params.dsc_pg_status_params.is_ungated = is_ungated; + (*seq_state->num_steps)++; + } +} + +void hwss_add_dsc_wait_disconnect_pending_clear(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc, + bool *is_ungated) { - struct dc_context *ctx = params->send_dmcub_cmd_params.ctx; - union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd; - enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type; - - dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_WAIT_DISCONNECT_PENDING_CLEAR; + seq_state->steps[*seq_state->num_steps].params.dsc_wait_disconnect_pending_clear_params.dsc = dsc; + seq_state->steps[*seq_state->num_steps].params.dsc_wait_disconnect_pending_clear_params.is_ungated = is_ungated; + (*seq_state->num_steps)++; + } } -void hwss_program_manual_trigger(union block_sequence_params *params) +void hwss_add_dsc_disable(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc, + bool *is_ungated) { - struct pipe_ctx *pipe_ctx = params->program_manual_trigger_params.pipe_ctx; - - if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger) - pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DSC_DISABLE; + seq_state->steps[*seq_state->num_steps].params.dsc_disable_params.dsc = dsc; + seq_state->steps[*seq_state->num_steps].params.dsc_disable_params.is_ungated = is_ungated; + (*seq_state->num_steps)++; + } } -void hwss_setup_dpp(union block_sequence_params *params) +void hwss_add_dccg_set_ref_dscclk(struct block_sequence_state *seq_state, + struct dccg *dccg, + int dsc_inst, + bool *is_ungated) { - struct pipe_ctx *pipe_ctx = params->setup_dpp_params.pipe_ctx; - struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_plane_state *plane_state = pipe_ctx->plane_state; - - if (!plane_state) - return; - - if (dpp && dpp->funcs->dpp_setup) { - // program the input csc - dpp->funcs->dpp_setup(dpp, - plane_state->format, - EXPANSION_MODE_ZERO, - plane_state->input_csc_color_matrix, - plane_state->color_space, - NULL); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DCCG_SET_REF_DSCCLK; + seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.dccg = dccg; + seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.dsc_inst = dsc_inst; + seq_state->steps[*seq_state->num_steps].params.dccg_set_ref_dscclk_params.is_ungated = is_ungated; + (*seq_state->num_steps)++; } +} - if (dpp && dpp->funcs->set_cursor_matrix) { - dpp->funcs->set_cursor_matrix(dpp, - plane_state->color_space, - plane_state->cursor_csc_color_matrix); +void hwss_add_dpp_root_clock_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool clock_on) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_ROOT_CLOCK_CONTROL; + seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.hws = hws; + seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.dpp_inst = dpp_inst; + seq_state->steps[*seq_state->num_steps].params.dpp_root_clock_control_params.clock_on = clock_on; + (*seq_state->num_steps)++; } } -void hwss_program_bias_and_scale(union block_sequence_params *params) +void hwss_add_dpp_pg_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool power_on) { - struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; - struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; - - //TODO :for CNVC set scale and bias registers if necessary - if (dpp->funcs->dpp_program_bias_and_scale) { - dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_PG_CONTROL; + seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.hws = hws; + seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.dpp_inst = dpp_inst; + seq_state->steps[*seq_state->num_steps].params.dpp_pg_control_params.power_on = power_on; + (*seq_state->num_steps)++; } } -void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) +void hwss_add_hubp_pg_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int hubp_inst, + bool power_on) { - struct mpc *mpc = params->power_on_mpc_mem_pwr_params.mpc; - int mpcc_id = params->power_on_mpc_mem_pwr_params.mpcc_id; - bool power_on = params->power_on_mpc_mem_pwr_params.power_on; - - if (mpc->funcs->power_on_mpc_mem_pwr) - mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, power_on); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_PG_CONTROL; + seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.hws = hws; + seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.hubp_inst = hubp_inst; + seq_state->steps[*seq_state->num_steps].params.hubp_pg_control_params.power_on = power_on; + (*seq_state->num_steps)++; + } } -void hwss_set_output_csc(union block_sequence_params *params) +void hwss_add_hubp_init(struct block_sequence_state *seq_state, + struct hubp *hubp) { - struct mpc *mpc = params->set_output_csc_params.mpc; - int opp_id = params->set_output_csc_params.opp_id; - const uint16_t *matrix = params->set_output_csc_params.regval; - enum mpc_output_csc_mode ocsc_mode = params->set_output_csc_params.ocsc_mode; - - if (mpc->funcs->set_output_csc != NULL) - mpc->funcs->set_output_csc(mpc, - opp_id, - matrix, - ocsc_mode); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_INIT; + seq_state->steps[*seq_state->num_steps].params.hubp_init_params.hubp = hubp; + (*seq_state->num_steps)++; + } } -void hwss_set_ocsc_default(union block_sequence_params *params) +void hwss_add_hubp_reset(struct block_sequence_state *seq_state, + struct hubp *hubp) { - struct mpc *mpc = params->set_ocsc_default_params.mpc; - int opp_id = params->set_ocsc_default_params.opp_id; - enum dc_color_space colorspace = params->set_ocsc_default_params.color_space; - enum mpc_output_csc_mode ocsc_mode = params->set_ocsc_default_params.ocsc_mode; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_RESET; + seq_state->steps[*seq_state->num_steps].params.hubp_reset_params.hubp = hubp; + (*seq_state->num_steps)++; + } +} - if (mpc->funcs->set_ocsc_default != NULL) - mpc->funcs->set_ocsc_default(mpc, - opp_id, - colorspace, - ocsc_mode); +void hwss_add_dpp_reset(struct block_sequence_state *seq_state, + struct dpp *dpp) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_RESET; + seq_state->steps[*seq_state->num_steps].params.dpp_reset_params.dpp = dpp; + (*seq_state->num_steps)++; + } } -void hwss_subvp_save_surf_addr(union block_sequence_params *params) +void hwss_add_opp_pipe_clock_control(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + bool enable) { - struct dc_dmub_srv *dc_dmub_srv = params->subvp_save_surf_addr.dc_dmub_srv; - const struct dc_plane_address *addr = params->subvp_save_surf_addr.addr; - uint8_t subvp_index = params->subvp_save_surf_addr.subvp_index; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = OPP_PIPE_CLOCK_CONTROL; + seq_state->steps[*seq_state->num_steps].params.opp_pipe_clock_control_params.opp = opp; + seq_state->steps[*seq_state->num_steps].params.opp_pipe_clock_control_params.enable = enable; + (*seq_state->num_steps)++; + } +} - dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index); +void hwss_add_hubp_set_vm_system_aperture_settings(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint64_t sys_default, + uint64_t sys_low, + uint64_t sys_high) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_VM_SYSTEM_APERTURE_SETTINGS; + seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_default.quad_part = sys_default; + seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_low.quad_part = sys_low; + seq_state->steps[*seq_state->num_steps].params.hubp_set_vm_system_aperture_settings_params.sys_high.quad_part = sys_high; + (*seq_state->num_steps)++; + } } -void get_surface_tile_visual_confirm_color( - struct pipe_ctx *pipe_ctx, - struct tg_color *color) +void hwss_add_hubp_set_flip_int(struct block_sequence_state *seq_state, + struct hubp *hubp) { - uint32_t color_value = MAX_TG_COLOR_VALUE; - /* Determine the overscan color based on the bottom-most plane's context */ - struct pipe_ctx *bottom_pipe_ctx = pipe_ctx; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_FLIP_INT; + seq_state->steps[*seq_state->num_steps].params.hubp_set_flip_int_params.hubp = hubp; + (*seq_state->num_steps)++; + } +} - while (bottom_pipe_ctx->bottom_pipe != NULL) - bottom_pipe_ctx = bottom_pipe_ctx->bottom_pipe; +void hwss_add_dccg_update_dpp_dto(struct block_sequence_state *seq_state, + struct dccg *dccg, + int dpp_inst, + int dppclk_khz) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DCCG_UPDATE_DPP_DTO; + seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dccg = dccg; + seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dpp_inst = dpp_inst; + seq_state->steps[*seq_state->num_steps].params.dccg_update_dpp_dto_params.dppclk_khz = dppclk_khz; + (*seq_state->num_steps)++; + } +} - switch (bottom_pipe_ctx->plane_state->tiling_info.gfx9.swizzle) { - case DC_SW_LINEAR: - /* LINEAR Surface - set border color to red */ - color->color_r_cr = color_value; - break; - default: - break; +void hwss_add_hubp_vtg_sel(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint32_t otg_inst) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_VTG_SEL; + seq_state->steps[*seq_state->num_steps].params.hubp_vtg_sel_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_vtg_sel_params.otg_inst = otg_inst; + (*seq_state->num_steps)++; } } -/** - * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank - * pattern updates - * - * @dc: [in] dc reference - * @context: [in] hardware context in use - */ -void hwss_wait_for_all_blank_complete(struct dc *dc, - struct dc_state *context) +void hwss_add_hubp_setup2(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *hubp_regs, + union dml2_global_sync_programming *global_sync, + struct dc_crtc_timing *timing) { - struct pipe_ctx *opp_head; - struct dce_hwseq *hws = dc->hwseq; - int i; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP2; + seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.hubp_regs = hubp_regs; + seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.global_sync = global_sync; + seq_state->steps[*seq_state->num_steps].params.hubp_setup2_params.timing = timing; + (*seq_state->num_steps)++; + } +} - if (!hws->funcs.wait_for_blank_complete) - return; +void hwss_add_hubp_setup(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_regs, + struct _vcs_dpi_display_ttu_regs_st *ttu_regs, + struct _vcs_dpi_display_rq_regs_st *rq_regs, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.dlg_regs = dlg_regs; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.ttu_regs = ttu_regs; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.rq_regs = rq_regs; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_params.pipe_dest = pipe_dest; + (*seq_state->num_steps)++; + } +} - for (i = 0; i < MAX_PIPES; i++) { - opp_head = &context->res_ctx.pipe_ctx[i]; +void hwss_add_hubp_set_unbounded_requesting(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool unbounded_req) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SET_UNBOUNDED_REQUESTING; + seq_state->steps[*seq_state->num_steps].params.hubp_set_unbounded_requesting_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_set_unbounded_requesting_params.unbounded_req = unbounded_req; + (*seq_state->num_steps)++; + } +} - if (!resource_is_pipe_type(opp_head, OPP_HEAD) || - dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) - continue; +void hwss_add_hubp_setup_interdependent2(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *hubp_regs) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP_INTERDEPENDENT2; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent2_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent2_params.hubp_regs = hubp_regs; + (*seq_state->num_steps)++; + } +} - hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); +void hwss_add_hubp_setup_interdependent(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_regs, + struct _vcs_dpi_display_ttu_regs_st *ttu_regs) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_SETUP_INTERDEPENDENT; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.dlg_regs = dlg_regs; + seq_state->steps[*seq_state->num_steps].params.hubp_setup_interdependent_params.ttu_regs = ttu_regs; + (*seq_state->num_steps)++; } } -void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +void hwss_add_hubp_program_surface_config(struct block_sequence_state *seq_state, + struct hubp *hubp, + enum surface_pixel_format format, + struct dc_tiling_info *tiling_info, + struct plane_size plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + int compat_level) { - struct pipe_ctx *otg_master; - struct timing_generator *tg; - int i; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_PROGRAM_SURFACE_CONFIG; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.format = format; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.tiling_info = tiling_info; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.plane_size = plane_size; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.rotation = rotation; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.dcc = dcc; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.horizontal_mirror = horizontal_mirror; + seq_state->steps[*seq_state->num_steps].params.program_surface_config_params.compat_level = compat_level; + (*seq_state->num_steps)++; + } +} - for (i = 0; i < MAX_PIPES; i++) { - otg_master = &context->res_ctx.pipe_ctx[i]; - if (!resource_is_pipe_type(otg_master, OTG_MASTER) || - dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) - continue; - tg = otg_master->stream_res.tg; - if (tg->funcs->wait_odm_doublebuffer_pending_clear) - tg->funcs->wait_odm_doublebuffer_pending_clear(tg); - if (tg->funcs->wait_otg_disable) - tg->funcs->wait_otg_disable(tg); +void hwss_add_dpp_setup_dpp(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_SETUP_DPP; + seq_state->steps[*seq_state->num_steps].params.setup_dpp_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; } +} - /* ODM update may require to reprogram blank pattern for each OPP */ - hwss_wait_for_all_blank_complete(dc, context); +void hwss_add_dpp_set_cursor_matrix(struct block_sequence_state *seq_state, + struct dpp *dpp, + enum dc_color_space color_space, + struct dc_csc_transform *cursor_csc_color_matrix) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_SET_CURSOR_MATRIX; + seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.dpp = dpp; + seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.color_space = color_space; + seq_state->steps[*seq_state->num_steps].params.dpp_set_cursor_matrix_params.cursor_csc_color_matrix = cursor_csc_color_matrix; + (*seq_state->num_steps)++; + } } -void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +void hwss_add_dpp_set_scaler(struct block_sequence_state *seq_state, + struct dpp *dpp, + const struct scaler_data *scl_data) { - int i; - for (i = 0; i < MAX_PIPES; i++) { - int count = 0; - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = DPP_SET_SCALER; + seq_state->steps[*seq_state->num_steps].params.dpp_set_scaler_params.dpp = dpp; + seq_state->steps[*seq_state->num_steps].params.dpp_set_scaler_params.scl_data = scl_data; + (*seq_state->num_steps)++; + } +} - if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) - continue; +void hwss_add_hubp_mem_program_viewport(struct block_sequence_state *seq_state, + struct hubp *hubp, + const struct rect *viewport, + const struct rect *viewport_c) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBP_MEM_PROGRAM_VIEWPORT; + seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.hubp = hubp; + seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.viewport = viewport; + seq_state->steps[*seq_state->num_steps].params.hubp_mem_program_viewport_params.viewport_c = viewport_c; + (*seq_state->num_steps)++; + } +} - /* Timeout 100 ms */ - while (count < 100000) { - /* Must set to false to start with, due to OR in update function */ - pipe->plane_state->status.is_flip_pending = false; - dc->hwss.update_pending_status(pipe); - if (!pipe->plane_state->status.is_flip_pending) - break; - udelay(1); - count++; - } - ASSERT(!pipe->plane_state->status.is_flip_pending); +void hwss_add_set_cursor_attribute(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_ATTRIBUTE; + seq_state->steps[*seq_state->num_steps].params.set_cursor_attribute_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_cursor_attribute_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; } } -void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +void hwss_add_set_cursor_position(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx) { -/* - * This function calls HWSS to wait for any potentially double buffered - * operations to complete. It should be invoked as a pre-amble prior - * to full update programming before asserting any HW locks. - */ - int pipe_idx; - int opp_inst; - int opp_count = dc->res_pool->res_cap->num_opp; - struct hubp *hubp; - int mpcc_inst; - const struct pipe_ctx *pipe_ctx; + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_POSITION; + seq_state->steps[*seq_state->num_steps].params.set_cursor_position_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_cursor_position_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; + } +} - for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { - pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; +void hwss_add_set_cursor_sdr_white_level(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = SET_CURSOR_SDR_WHITE_LEVEL; + seq_state->steps[*seq_state->num_steps].params.set_cursor_sdr_white_level_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.set_cursor_sdr_white_level_params.pipe_ctx = pipe_ctx; + (*seq_state->num_steps)++; + } +} - if (!pipe_ctx->stream) - continue; +void hwss_add_program_output_csc(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + enum dc_color_space colorspace, + uint16_t *matrix, + int opp_id) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = PROGRAM_OUTPUT_CSC; + seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.pipe_ctx = pipe_ctx; + seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.colorspace = colorspace; + seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.matrix = matrix; + seq_state->steps[*seq_state->num_steps].params.program_output_csc_params.opp_id = opp_id; + (*seq_state->num_steps)++; + } +} - /* For full update we must wait for all double buffer updates, not just DRR updates. This - * is particularly important for minimal transitions. Only check for OTG_MASTER pipes, - * as non-OTG Master pipes share the same OTG as - */ - if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && dc->hwss.wait_for_all_pending_updates) { - dc->hwss.wait_for_all_pending_updates(pipe_ctx); - } +void hwss_add_phantom_hubp_post_enable(struct block_sequence_state *seq_state, + struct hubp *hubp) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = PHANTOM_HUBP_POST_ENABLE; + seq_state->steps[*seq_state->num_steps].params.phantom_hubp_post_enable_params.hubp = hubp; + (*seq_state->num_steps)++; + } +} - hubp = pipe_ctx->plane_res.hubp; - if (!hubp) - continue; +void hwss_add_update_force_pstate(struct block_sequence_state *seq_state, + struct dc *dc, + struct dc_state *context) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = UPDATE_FORCE_PSTATE; + seq_state->steps[*seq_state->num_steps].params.update_force_pstate_params.dc = dc; + seq_state->steps[*seq_state->num_steps].params.update_force_pstate_params.context = context; + (*seq_state->num_steps)++; + } +} - mpcc_inst = hubp->inst; - // MPCC inst is equal to pipe index in practice - for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { - if ((dc->res_pool->opps[opp_inst] != NULL) && - (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; - break; - } - } +void hwss_add_hubbub_apply_dedcn21_147_wa(struct block_sequence_state *seq_state, + struct hubbub *hubbub) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBBUB_APPLY_DEDCN21_147_WA; + seq_state->steps[*seq_state->num_steps].params.hubbub_apply_dedcn21_147_wa_params.hubbub = hubbub; + (*seq_state->num_steps)++; } - hwss_wait_for_odm_update_pending_complete(dc, dc_context); } -void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +void hwss_add_hubbub_allow_self_refresh_control(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + bool allow, + bool *disallow_self_refresh_applied) { - /* wait for outstanding updates */ - hwss_wait_for_outstanding_hw_updates(dc, dc_context); + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = HUBBUB_ALLOW_SELF_REFRESH_CONTROL; + seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.hubbub = hubbub; + seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.allow = allow; + seq_state->steps[*seq_state->num_steps].params.hubbub_allow_self_refresh_control_params.disallow_self_refresh_applied = disallow_self_refresh_applied; + (*seq_state->num_steps)++; + } +} - /* perform outstanding post update programming */ - if (dc->hwss.program_outstanding_updates) - dc->hwss.program_outstanding_updates(dc, dc_context); +void hwss_add_tg_get_frame_count(struct block_sequence_state *seq_state, + struct timing_generator *tg, + unsigned int *frame_count) +{ + if (*seq_state->num_steps < MAX_HWSS_BLOCK_SEQUENCE_SIZE) { + seq_state->steps[*seq_state->num_steps].func = TG_GET_FRAME_COUNT; + seq_state->steps[*seq_state->num_steps].params.tg_get_frame_count_params.tg = tg; + seq_state->steps[*seq_state->num_steps].params.tg_get_frame_count_params.frame_count = frame_count; + (*seq_state->num_steps)++; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 770e36048b5f..8b9a78ff234c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1165,6 +1165,7 @@ struct dc_debug_options { unsigned int auxless_alpm_lfps_t1t2_us; short auxless_alpm_lfps_t1t2_offset_us; bool disable_stutter_for_wm_program; + bool enable_block_sequence_programming; }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 41699ff7256f..e6d3ff8598f5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -26,9 +26,11 @@ #include "clk_mgr.h" #include "dsc.h" #include "link_service.h" +#include "custom_float.h" #include "dce/dmub_hw_lock_mgr.h" #include "dcn10/dcn10_cm_common.h" +#include "dcn10/dcn10_hubbub.h" #include "dcn20/dcn20_optc.h" #include "dcn30/dcn30_cm_common.h" #include "dcn32/dcn32_hwseq.h" @@ -36,6 +38,7 @@ #include "dcn401/dcn401_resource.h" #include "dc_state_priv.h" #include "link_enc_cfg.h" +#include "../hw_sequencer.h" #define DC_LOGGER_INIT(logger) @@ -1601,6 +1604,143 @@ void dcn401_update_odm(struct dc *dc, struct dc_state *context, dc->hwseq->funcs.blank_pixel_data(dc, otg_master, true); } +static void dcn401_add_dsc_sequence_for_odm_change(struct dc *dc, struct dc_state *context, + struct pipe_ctx *otg_master, struct block_sequence_state *seq_state) +{ + struct pipe_ctx *old_pipe; + struct pipe_ctx *new_pipe; + struct pipe_ctx *old_opp_heads[MAX_PIPES]; + struct pipe_ctx *old_otg_master; + int old_opp_head_count = 0; + int i; + + old_otg_master = &dc->current_state->res_ctx.pipe_ctx[otg_master->pipe_idx]; + + if (resource_is_pipe_type(old_otg_master, OTG_MASTER)) { + old_opp_head_count = resource_get_opp_heads_for_otg_master(old_otg_master, + &dc->current_state->res_ctx, + old_opp_heads); + } else { + old_otg_master = NULL; + } + + /* Process new DSC configuration if DSC is enabled */ + if (otg_master->stream_res.dsc && otg_master->stream->timing.flags.DSC) { + struct dc_stream_state *stream = otg_master->stream; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + int last_dsc_calc = 0; + bool should_use_dto_dscclk = (dc->res_pool->dccg->funcs->set_dto_dscclk != NULL) && + stream->timing.pix_clk_100hz > 480000; + + /* Count ODM pipes */ + for (odm_pipe = otg_master->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + opp_cnt++; + + int num_slices_h = stream->timing.dsc_cfg.num_slices_h / opp_cnt; + + /* Step 1: Set DTO DSCCLK for main DSC if needed */ + if (should_use_dto_dscclk) { + hwss_add_dccg_set_dto_dscclk(seq_state, dc->res_pool->dccg, + otg_master->stream_res.dsc->inst, num_slices_h); + } + + /* Step 2: Calculate and set DSC config for main DSC */ + last_dsc_calc = *seq_state->num_steps; + hwss_add_dsc_calculate_and_set_config(seq_state, otg_master, true, opp_cnt); + + /* Step 3: Enable main DSC block */ + hwss_add_dsc_enable_with_opp(seq_state, otg_master); + + /* Step 4: Configure and enable ODM DSC blocks */ + for (odm_pipe = otg_master->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { + if (!odm_pipe->stream_res.dsc) + continue; + + /* Set DTO DSCCLK for ODM DSC if needed */ + if (should_use_dto_dscclk) { + hwss_add_dccg_set_dto_dscclk(seq_state, dc->res_pool->dccg, + odm_pipe->stream_res.dsc->inst, num_slices_h); + } + + /* Calculate and set DSC config for ODM DSC */ + last_dsc_calc = *seq_state->num_steps; + hwss_add_dsc_calculate_and_set_config(seq_state, odm_pipe, true, opp_cnt); + + /* Enable ODM DSC block */ + hwss_add_dsc_enable_with_opp(seq_state, odm_pipe); + } + + /* Step 5: Configure DSC in timing generator */ + hwss_add_tg_set_dsc_config(seq_state, otg_master->stream_res.tg, + &seq_state->steps[last_dsc_calc].params.dsc_calculate_and_set_config_params.dsc_optc_cfg, true); + } else if (otg_master->stream_res.dsc && !otg_master->stream->timing.flags.DSC) { + /* Disable DSC in OPTC */ + hwss_add_tg_set_dsc_config(seq_state, otg_master->stream_res.tg, NULL, false); + + hwss_add_dsc_disconnect(seq_state, otg_master->stream_res.dsc); + } + + /* Disable DSC for old pipes that no longer need it */ + if (old_otg_master && old_otg_master->stream_res.dsc) { + for (i = 0; i < old_opp_head_count; i++) { + old_pipe = old_opp_heads[i]; + new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx]; + + /* If old pipe had DSC but new pipe doesn't, disable the old DSC */ + if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) { + /* Then disconnect DSC block */ + hwss_add_dsc_disconnect(seq_state, old_pipe->stream_res.dsc); + } + } + } +} + +void dcn401_update_odm_sequence(struct dc *dc, struct dc_state *context, + struct pipe_ctx *otg_master, struct block_sequence_state *seq_state) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + int opp_inst[MAX_PIPES] = {0}; + int opp_head_count; + int odm_slice_width = resource_get_odm_slice_dst_width(otg_master, false); + int last_odm_slice_width = resource_get_odm_slice_dst_width(otg_master, true); + int i; + + opp_head_count = resource_get_opp_heads_for_otg_master( + otg_master, &context->res_ctx, opp_heads); + + for (i = 0; i < opp_head_count; i++) + opp_inst[i] = opp_heads[i]->stream_res.opp->inst; + + /* Add ODM combine/bypass operation to sequence */ + if (opp_head_count > 1) { + hwss_add_optc_set_odm_combine(seq_state, otg_master->stream_res.tg, opp_inst, + opp_head_count, odm_slice_width, last_odm_slice_width); + } else { + hwss_add_optc_set_odm_bypass(seq_state, otg_master->stream_res.tg, &otg_master->stream->timing); + } + + /* Add OPP operations to sequence */ + for (i = 0; i < opp_head_count; i++) { + /* Add OPP pipe clock control operation */ + hwss_add_opp_pipe_clock_control(seq_state, opp_heads[i]->stream_res.opp, true); + + /* Add OPP program left edge extra pixel operation */ + hwss_add_opp_program_left_edge_extra_pixel(seq_state, opp_heads[i]->stream_res.opp, + opp_heads[i]->stream->timing.pixel_encoding, resource_is_pipe_type(opp_heads[i], OTG_MASTER)); + } + + /* Add DSC update operations to sequence */ + dcn401_add_dsc_sequence_for_odm_change(dc, context, otg_master, seq_state); + + /* Add blank pixel data operation if needed */ + if (!resource_is_pipe_type(otg_master, DPP_PIPE)) { + if (dc->hwseq->funcs.blank_pixel_data_sequence) + dc->hwseq->funcs.blank_pixel_data_sequence( + dc, otg_master, true, seq_state); + } +} + void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings) { @@ -2089,6 +2229,148 @@ void dcn401_program_pipe( } } +/* + * dcn401_program_pipe_sequence - Sequence-based version of dcn401_program_pipe + * + * This function creates a sequence-based version of the original dcn401_program_pipe + * function. Instead of directly calling hardware programming functions, it appends + * sequence steps to the provided block_sequence array that can later be executed + * as part of hwss_execute_sequence. + * + */ +void dcn401_program_pipe_sequence( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state) +{ + struct dce_hwseq *hws = dc->hwseq; + + /* Only need to unblank on top pipe */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) { + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.odm || + pipe_ctx->stream->update_flags.bits.abm_level) { + if (dc->hwseq->funcs.blank_pixel_data_sequence) + dc->hwseq->funcs.blank_pixel_data_sequence(dc, pipe_ctx, + !pipe_ctx->plane_state || !pipe_ctx->plane_state->visible, + seq_state); + } + } + + /* Only update TG on top pipe */ + if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe + && !pipe_ctx->prev_odm_pipe) { + + /* Step 1: Program global sync */ + hwss_add_tg_program_global_sync(seq_state, pipe_ctx->stream_res.tg, + dcn401_calculate_vready_offset_for_group(pipe_ctx), + (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels, + (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); + + /* Step 2: Wait for VACTIVE state (if not phantom pipe) */ + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + hwss_add_tg_wait_for_state(seq_state, pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); + } + + /* Step 3: Set VTG params */ + hwss_add_tg_set_vtg_params(seq_state, pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); + + /* Step 4: Setup vupdate interrupt (if available) */ + if (hws->funcs.setup_vupdate_interrupt) { + dcn401_setup_vupdate_interrupt_sequence(dc, pipe_ctx, seq_state); + } + } + + if (pipe_ctx->update_flags.bits.odm) { + if (hws->funcs.update_odm_sequence) + hws->funcs.update_odm_sequence(dc, context, pipe_ctx, seq_state); + } + + if (pipe_ctx->update_flags.bits.enable) { + if (dc->hwss.enable_plane_sequence) + dc->hwss.enable_plane_sequence(dc, pipe_ctx, context, seq_state); + } + + if (pipe_ctx->update_flags.bits.det_size) { + if (dc->res_pool->hubbub->funcs->program_det_size) { + hwss_add_hubp_program_det_size(seq_state, dc->res_pool->hubbub, + pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb); + } + + if (dc->res_pool->hubbub->funcs->program_det_segments) { + hwss_add_hubp_program_det_segments(seq_state, dc->res_pool->hubbub, + pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); + } + } + + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) { + + if (dc->hwss.update_dchubp_dpp_sequence) + dc->hwss.update_dchubp_dpp_sequence(dc, pipe_ctx, context, seq_state); + } + + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) { + + hws->funcs.set_hdr_multiplier_sequence(pipe_ctx, seq_state); + } + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change || + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) { + + hwss_add_dpp_set_input_transfer_func(seq_state, dc, pipe_ctx, pipe_ctx->plane_state); + } + + /* dcn10_translate_regamma_to_hw_format takes 750us to finish + * only do gamma programming for powering on, internal memcmp to avoid + * updating on slave planes + */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) { + hwss_add_dpp_set_output_transfer_func(seq_state, dc, pipe_ctx, pipe_ctx->stream); + } + + /* If the pipe has been enabled or has a different opp, we + * should reprogram the fmt. This deals with cases where + * interation between mpc and odm combine on different streams + * causes a different pipe to be chosen to odm combine with. + */ + if (pipe_ctx->update_flags.bits.enable + || pipe_ctx->update_flags.bits.opp_changed) { + + hwss_add_opp_set_dyn_expansion(seq_state, pipe_ctx->stream_res.opp, COLOR_SPACE_YCBCR601, pipe_ctx->stream->timing.display_color_depth, pipe_ctx->stream->signal); + + hwss_add_opp_program_fmt(seq_state, pipe_ctx->stream_res.opp, &pipe_ctx->stream->bit_depth_params, &pipe_ctx->stream->clamping); + } + + /* Set ABM pipe after other pipe configurations done */ + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { + if (pipe_ctx->stream_res.abm) { + hwss_add_abm_set_pipe(seq_state, dc, pipe_ctx); + + hwss_add_abm_set_level(seq_state, pipe_ctx->stream_res.abm, pipe_ctx->stream->abm_level); + } + } + + if (pipe_ctx->update_flags.bits.test_pattern_changed) { + struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp; + + hwss_add_opp_program_bit_depth_reduction(seq_state, odm_opp, true, pipe_ctx); + + hwss_add_opp_set_disp_pattern_generator(seq_state, odm_opp, pipe_ctx->stream_res.test_pattern_params.test_pattern, pipe_ctx->stream_res.test_pattern_params.color_space, pipe_ctx->stream_res.test_pattern_params.color_depth, (struct tg_color){0}, false, pipe_ctx->stream_res.test_pattern_params.width, pipe_ctx->stream_res.test_pattern_params.height, pipe_ctx->stream_res.test_pattern_params.offset); + } + +} + void dcn401_program_front_end_for_ctx( struct dc *dc, struct dc_state *context) @@ -2166,7 +2448,6 @@ void dcn401_program_front_end_for_ctx( && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); - /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable @@ -2245,11 +2526,11 @@ void dcn401_program_front_end_for_ctx( /* Avoid underflow by check of pipe line read when adding 2nd plane. */ if (hws->wa.wait_hubpret_read_start_during_mpo_transition && - !pipe->top_pipe && - pipe->stream && - pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start && - dc->current_state->stream_status[0].plane_count == 1 && - context->stream_status[0].plane_count > 1) { + !pipe->top_pipe && + pipe->stream && + pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start && + dc->current_state->stream_status[0].plane_count == 1 && + context->stream_status[0].plane_count > 1) { pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp); } } @@ -2361,7 +2642,6 @@ void dcn401_post_unlock_program_front_end( */ if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); - /* Only program the MALL registers after all the main and phantom pipes * are done programming. */ @@ -2731,3 +3011,1061 @@ void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pip cs->offload_streams[stream_idx].payloads[payload_idx].pipe_mask |= (1u << pipe->pipe_idx); } + +void dcn401_plane_atomic_power_down_sequence(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp, + struct block_sequence_state *seq_state) +{ + struct dce_hwseq *hws = dc->hwseq; + uint32_t org_ip_request_cntl = 0; + + DC_LOGGER_INIT(dc->ctx->logger); + + /* Check and set DC_IP_REQUEST_CNTL if needed */ + if (REG(DC_IP_REQUEST_CNTL)) { + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) { + hwss_add_dc_ip_request_cntl(seq_state, dc, true); + } + } + + /* DPP power gating control */ + hwss_add_dpp_pg_control(seq_state, hws, dpp->inst, false); + + /* HUBP power gating control */ + hwss_add_hubp_pg_control(seq_state, hws, hubp->inst, false); + + /* HUBP reset */ + hwss_add_hubp_reset(seq_state, hubp); + + /* DPP reset */ + hwss_add_dpp_reset(seq_state, dpp); + + /* Restore DC_IP_REQUEST_CNTL if it was originally 0 */ + if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL)) { + hwss_add_dc_ip_request_cntl(seq_state, dc, false); + } + + DC_LOG_DEBUG("Power gated front end %d\n", hubp->inst); + + /* DPP root clock control */ + hwss_add_dpp_root_clock_control(seq_state, hws, dpp->inst, false); +} + +/* trigger HW to start disconnect plane from stream on the next vsync using block sequence */ +void dcn401_plane_atomic_disconnect_sequence(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + struct hubp *hubp = pipe_ctx->plane_res.hubp; + int dpp_id = pipe_ctx->plane_res.dpp->inst; + struct mpc *mpc = dc->res_pool->mpc; + struct mpc_tree *mpc_tree_params; + struct mpcc *mpcc_to_remove = NULL; + struct output_pixel_processor *opp = pipe_ctx->stream_res.opp; + + mpc_tree_params = &(opp->mpc_tree_params); + mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, dpp_id); + + /*Already reset*/ + if (mpcc_to_remove == NULL) + return; + + /* Step 1: Remove MPCC from MPC tree */ + hwss_add_mpc_remove_mpcc(seq_state, mpc, mpc_tree_params, mpcc_to_remove); + + // Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle, + // so don't wait for MPCC_IDLE in the programming sequence + if (dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM) { + /* Step 2: Set MPCC disconnect pending flag */ + hwss_add_opp_set_mpcc_disconnect_pending(seq_state, opp, pipe_ctx->plane_res.mpcc_inst, true); + } + + /* Step 3: Set optimized required flag */ + hwss_add_dc_set_optimized_required(seq_state, dc, true); + + /* Step 4: Disconnect HUBP if function exists */ + if (hubp->funcs->hubp_disconnect) { + hwss_add_hubp_disconnect(seq_state, hubp); + } + + /* Step 5: Verify pstate change high if debug sanity checks are enabled */ + if (dc->debug.sanity_checks) { + dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); + } +} + +void dcn401_blank_pixel_data_sequence( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool blank, + struct block_sequence_state *seq_state) +{ + struct tg_color black_color = {0}; + struct stream_resource *stream_res = &pipe_ctx->stream_res; + struct dc_stream_state *stream = pipe_ctx->stream; + enum dc_color_space color_space = stream->output_color_space; + enum controller_dp_test_pattern test_pattern = CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR; + enum controller_dp_color_space test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED; + struct pipe_ctx *odm_pipe; + struct rect odm_slice_src; + + if (stream->link->test_pattern_enabled) + return; + + /* get opp dpg blank color */ + color_space_to_black_color(dc, color_space, &black_color); + + if (blank) { + /* Set ABM immediate disable */ + hwss_add_abm_set_immediate_disable(seq_state, dc, pipe_ctx); + + if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) { + test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES; + test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_RGB; + } + } else { + test_pattern = CONTROLLER_DP_TEST_PATTERN_VIDEOMODE; + } + + odm_pipe = pipe_ctx; + + /* Set display pattern generator for all ODM pipes */ + while (odm_pipe->next_odm_pipe) { + odm_slice_src = resource_get_odm_slice_src_rect(odm_pipe); + + hwss_add_opp_set_disp_pattern_generator(seq_state, + odm_pipe->stream_res.opp, + test_pattern, + test_pattern_color_space, + stream->timing.display_color_depth, + black_color, + true, + odm_slice_src.width, + odm_slice_src.height, + odm_slice_src.x); + + odm_pipe = odm_pipe->next_odm_pipe; + } + + /* Set display pattern generator for final ODM pipe */ + odm_slice_src = resource_get_odm_slice_src_rect(odm_pipe); + + hwss_add_opp_set_disp_pattern_generator(seq_state, + odm_pipe->stream_res.opp, + test_pattern, + test_pattern_color_space, + stream->timing.display_color_depth, + black_color, + true, + odm_slice_src.width, + odm_slice_src.height, + odm_slice_src.x); + + /* Handle ABM level setting when not blanking */ + if (!blank) { + if (stream_res->abm) { + /* Set pipe for ABM */ + hwss_add_abm_set_pipe(seq_state, dc, pipe_ctx); + + /* Set ABM level */ + hwss_add_abm_set_level(seq_state, stream_res->abm, stream->abm_level); + } + } +} + +void dcn401_program_all_writeback_pipes_in_tree_sequence( + struct dc *dc, + const struct dc_stream_state *stream, + struct dc_state *context, + struct block_sequence_state *seq_state) +{ + struct dwbc *dwb; + int i_wb, i_pipe; + + if (!stream || stream->num_wb_info > dc->res_pool->res_cap->num_dwb) + return; + + /* For each writeback pipe */ + for (i_wb = 0; i_wb < stream->num_wb_info; i_wb++) { + /* Get direct pointer to writeback info */ + struct dc_writeback_info *wb_info = (struct dc_writeback_info *)&stream->writeback_info[i_wb]; + int mpcc_inst = -1; + + if (wb_info->wb_enabled) { + /* Get the MPCC instance for writeback_source_plane */ + for (i_pipe = 0; i_pipe < dc->res_pool->pipe_count; i_pipe++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i_pipe]; + + if (!pipe_ctx->plane_state) + continue; + + if (pipe_ctx->plane_state == wb_info->writeback_source_plane) { + mpcc_inst = pipe_ctx->plane_res.mpcc_inst; + break; + } + } + + if (mpcc_inst == -1) { + /* Disable writeback pipe and disconnect from MPCC + * if source plane has been removed + */ + dcn401_disable_writeback_sequence(dc, wb_info, seq_state); + continue; + } + + ASSERT(wb_info->dwb_pipe_inst < dc->res_pool->res_cap->num_dwb); + dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + + if (dwb->funcs->is_enabled(dwb)) { + /* Writeback pipe already enabled, only need to update */ + dcn401_update_writeback_sequence(dc, wb_info, context, seq_state); + } else { + /* Enable writeback pipe and connect to MPCC */ + dcn401_enable_writeback_sequence(dc, wb_info, context, mpcc_inst, seq_state); + } + } else { + /* Disable writeback pipe and disconnect from MPCC */ + dcn401_disable_writeback_sequence(dc, wb_info, seq_state); + } + } +} + +void dcn401_enable_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct dc_state *context, + int mpcc_inst, + struct block_sequence_state *seq_state) +{ + struct dwbc *dwb; + struct mcif_wb *mcif_wb; + + if (!wb_info->wb_enabled || wb_info->dwb_pipe_inst >= dc->res_pool->res_cap->num_dwb) + return; + + dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst]; + + /* Update DWBC with new parameters */ + hwss_add_dwbc_update(seq_state, dwb, &wb_info->dwb_params); + + /* Configure MCIF_WB buffer settings */ + hwss_add_mcif_wb_config_buf(seq_state, mcif_wb, &wb_info->mcif_buf_params, wb_info->dwb_params.dest_height); + + /* Configure MCIF_WB arbitration */ + hwss_add_mcif_wb_config_arb(seq_state, mcif_wb, &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]); + + /* Enable MCIF_WB */ + hwss_add_mcif_wb_enable(seq_state, mcif_wb); + + /* Set DWB MUX to connect writeback to MPCC */ + hwss_add_mpc_set_dwb_mux(seq_state, dc->res_pool->mpc, wb_info->dwb_pipe_inst, mpcc_inst); + + /* Enable DWBC */ + hwss_add_dwbc_enable(seq_state, dwb, &wb_info->dwb_params); +} + +void dcn401_disable_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct block_sequence_state *seq_state) +{ + struct dwbc *dwb; + struct mcif_wb *mcif_wb; + + if (wb_info->dwb_pipe_inst >= dc->res_pool->res_cap->num_dwb) + return; + + dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst]; + + /* Disable DWBC */ + hwss_add_dwbc_disable(seq_state, dwb); + + /* Disable DWB MUX */ + hwss_add_mpc_disable_dwb_mux(seq_state, dc->res_pool->mpc, wb_info->dwb_pipe_inst); + + /* Disable MCIF_WB */ + hwss_add_mcif_wb_disable(seq_state, mcif_wb); +} + +void dcn401_update_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct dc_state *context, + struct block_sequence_state *seq_state) +{ + struct dwbc *dwb; + struct mcif_wb *mcif_wb; + + if (!wb_info->wb_enabled || wb_info->dwb_pipe_inst >= dc->res_pool->res_cap->num_dwb) + return; + + dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + mcif_wb = dc->res_pool->mcif_wb[wb_info->dwb_pipe_inst]; + + /* Update writeback pipe */ + hwss_add_dwbc_update(seq_state, dwb, &wb_info->dwb_params); + + /* Update MCIF_WB buffer settings if needed */ + hwss_add_mcif_wb_config_buf(seq_state, mcif_wb, &wb_info->mcif_buf_params, wb_info->dwb_params.dest_height); +} + +static int find_free_gsl_group(const struct dc *dc) +{ + if (dc->res_pool->gsl_groups.gsl_0 == 0) + return 1; + if (dc->res_pool->gsl_groups.gsl_1 == 0) + return 2; + if (dc->res_pool->gsl_groups.gsl_2 == 0) + return 3; + + return 0; +} + +void dcn401_setup_gsl_group_as_lock_sequence( + const struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool enable, + struct block_sequence_state *seq_state) +{ + struct gsl_params gsl; + int group_idx; + + memset(&gsl, 0, sizeof(struct gsl_params)); + + if (enable) { + /* return if group already assigned since GSL was set up + * for vsync flip, we would unassign so it can't be "left over" + */ + if (pipe_ctx->stream_res.gsl_group > 0) + return; + + group_idx = find_free_gsl_group(dc); + ASSERT(group_idx != 0); + pipe_ctx->stream_res.gsl_group = group_idx; + + /* set gsl group reg field and mark resource used */ + switch (group_idx) { + case 1: + gsl.gsl0_en = 1; + dc->res_pool->gsl_groups.gsl_0 = 1; + break; + case 2: + gsl.gsl1_en = 1; + dc->res_pool->gsl_groups.gsl_1 = 1; + break; + case 3: + gsl.gsl2_en = 1; + dc->res_pool->gsl_groups.gsl_2 = 1; + break; + default: + BREAK_TO_DEBUGGER(); + return; // invalid case + } + gsl.gsl_master_en = 1; + } else { + group_idx = pipe_ctx->stream_res.gsl_group; + if (group_idx == 0) + return; // if not in use, just return + + pipe_ctx->stream_res.gsl_group = 0; + + /* unset gsl group reg field and mark resource free */ + switch (group_idx) { + case 1: + gsl.gsl0_en = 0; + dc->res_pool->gsl_groups.gsl_0 = 0; + break; + case 2: + gsl.gsl1_en = 0; + dc->res_pool->gsl_groups.gsl_1 = 0; + break; + case 3: + gsl.gsl2_en = 0; + dc->res_pool->gsl_groups.gsl_2 = 0; + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + gsl.gsl_master_en = 0; + } + + hwss_add_tg_set_gsl(seq_state, pipe_ctx->stream_res.tg, gsl); + hwss_add_tg_set_gsl_source_select(seq_state, pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0); +} + +void dcn401_disable_plane_sequence( + struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM; + struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; + + if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated) + return; + + /* Wait for MPCC disconnect */ + if (dc->hwss.wait_for_mpcc_disconnect_sequence) + dc->hwss.wait_for_mpcc_disconnect_sequence(dc, dc->res_pool, pipe_ctx, seq_state); + + /* In flip immediate with pipe splitting case GSL is used for synchronization + * so we must disable it when the plane is disabled. + */ + if (pipe_ctx->stream_res.gsl_group != 0) { + dcn401_setup_gsl_group_as_lock_sequence(dc, pipe_ctx, false, seq_state); + } + + /* Update HUBP mall sel */ + if (pipe_ctx->plane_res.hubp && pipe_ctx->plane_res.hubp->funcs->hubp_update_mall_sel) { + hwss_add_hubp_update_mall_sel(seq_state, pipe_ctx->plane_res.hubp, 0, false); + } + + /* Set flip control GSL */ + hwss_add_hubp_set_flip_control_gsl(seq_state, pipe_ctx->plane_res.hubp, false); + + /* HUBP clock control */ + hwss_add_hubp_clk_cntl(seq_state, pipe_ctx->plane_res.hubp, false); + + /* DPP clock control */ + hwss_add_dpp_dppclk_control(seq_state, pipe_ctx->plane_res.dpp, false, false); + + /* Plane atomic power down */ + if (dc->hwseq->funcs.plane_atomic_power_down_sequence) + dc->hwseq->funcs.plane_atomic_power_down_sequence(dc, pipe_ctx->plane_res.dpp, + pipe_ctx->plane_res.hubp, seq_state); + + pipe_ctx->stream = NULL; + memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res)); + memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res)); + pipe_ctx->top_pipe = NULL; + pipe_ctx->bottom_pipe = NULL; + pipe_ctx->prev_odm_pipe = NULL; + pipe_ctx->next_odm_pipe = NULL; + pipe_ctx->plane_state = NULL; + + /* Turn back off the phantom OTG after the phantom plane is fully disabled */ + if (is_phantom && tg && tg->funcs->disable_phantom_crtc) { + hwss_add_disable_phantom_crtc(seq_state, tg); + } +} + +void dcn401_post_unlock_reset_opp_sequence( + struct dc *dc, + struct pipe_ctx *opp_head, + struct block_sequence_state *seq_state) +{ + struct display_stream_compressor *dsc = opp_head->stream_res.dsc; + struct dccg *dccg = dc->res_pool->dccg; + + /* Wait for all DPP pipes in current mpc blending tree completes double + * buffered disconnection before resetting OPP + */ + if (dc->hwss.wait_for_mpcc_disconnect_sequence) + dc->hwss.wait_for_mpcc_disconnect_sequence(dc, dc->res_pool, opp_head, seq_state); + + if (dsc) { + bool *is_ungated = NULL; + /* Check DSC power gate status */ + if (dc->hwseq && dc->hwseq->funcs.dsc_pg_status) { + hwss_add_dsc_pg_status(seq_state, dc->hwseq, dsc->inst, false); + } + + /* Seamless update specific where we will postpone non + * double buffered DSCCLK disable logic in post unlock + * sequence after DSC is disconnected from OPP but not + * yet power gated. + */ + + /* DSC wait disconnect pending clear */ + hwss_add_dsc_wait_disconnect_pending_clear(seq_state, dsc, is_ungated); + + /* DSC disable */ + hwss_add_dsc_disable(seq_state, dsc, is_ungated); + + /* Set reference DSCCLK */ + if (dccg && dccg->funcs->set_ref_dscclk) { + hwss_add_dccg_set_ref_dscclk(seq_state, dccg, dsc->inst, 0); + } + } +} + +void dcn401_dc_ip_request_cntl(struct dc *dc, bool enable) +{ + struct dce_hwseq *hws = dc->hwseq; + + if (REG(DC_IP_REQUEST_CNTL)) { + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, enable ? 1 : 0); + } +} + +void dcn401_enable_plane_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state) +{ + struct dce_hwseq *hws = dc->hwseq; + uint32_t org_ip_request_cntl = 0; + + if (!pipe_ctx->plane_res.dpp || !pipe_ctx->plane_res.hubp || !pipe_ctx->stream_res.opp) { + return; + } + + if (REG(DC_IP_REQUEST_CNTL)) + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + + /* Step 1: DPP root clock control - enable clock */ + if (hws->funcs.dpp_root_clock_control) { + hwss_add_dpp_root_clock_control(seq_state, hws, pipe_ctx->plane_res.dpp->inst, true); + } + + /* Step 2: Enable DC IP request (if needed) */ + if (hws->funcs.dc_ip_request_cntl) { + hwss_add_dc_ip_request_cntl(seq_state, dc, true); + } + + /* Step 3: DPP power gating control - power on */ + if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.dpp_pg_control) { + hwss_add_dpp_pg_control(seq_state, hws, pipe_ctx->plane_res.dpp->inst, true); + } + + /* Step 4: HUBP power gating control - power on */ + if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.hubp_pg_control) { + hwss_add_hubp_pg_control(seq_state, hws, pipe_ctx->plane_res.hubp->inst, true); + } + + /* Step 5: Disable DC IP request (restore state) */ + if (org_ip_request_cntl == 0 && hws->funcs.dc_ip_request_cntl) { + hwss_add_dc_ip_request_cntl(seq_state, dc, false); + } + + /* Step 6: HUBP clock control - enable DCFCLK */ + if (pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl) { + hwss_add_hubp_clk_cntl(seq_state, pipe_ctx->plane_res.hubp, true); + } + + /* Step 7: HUBP initialization */ + if (pipe_ctx->plane_res.hubp->funcs->hubp_init) { + hwss_add_hubp_init(seq_state, pipe_ctx->plane_res.hubp); + } + + /* Step 8: OPP pipe clock control - enable */ + if (pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control) { + hwss_add_opp_pipe_clock_control(seq_state, pipe_ctx->stream_res.opp, true); + } + + /* Step 9: VM system aperture settings */ + if (dc->vm_pa_config.valid && pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings) { + hwss_add_hubp_set_vm_system_aperture_settings(seq_state, pipe_ctx->plane_res.hubp, 0, + dc->vm_pa_config.system_aperture.start_addr, dc->vm_pa_config.system_aperture.end_addr); + } + + /* Step 10: Flip interrupt setup */ + if (!pipe_ctx->top_pipe + && pipe_ctx->plane_state + && pipe_ctx->plane_state->flip_int_enabled + && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int) { + hwss_add_hubp_set_flip_int(seq_state, pipe_ctx->plane_res.hubp); + } +} + +void dcn401_update_dchubp_dpp_sequence(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state) +{ + struct dce_hwseq *hws = dc->hwseq; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dc_plane_state *plane_state = pipe_ctx->plane_state; + struct dccg *dccg = dc->res_pool->dccg; + bool viewport_changed = false; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx); + + if (!hubp || !dpp || !plane_state) { + return; + } + + /* Step 1: DPP DPPCLK control */ + if (pipe_ctx->update_flags.bits.dppclk) { + hwss_add_dpp_dppclk_control(seq_state, dpp, false, true); + } + + /* Step 2: DCCG update DPP DTO */ + if (pipe_ctx->update_flags.bits.enable) { + hwss_add_dccg_update_dpp_dto(seq_state, dccg, dpp->inst, pipe_ctx->plane_res.bw.dppclk_khz); + } + + /* Step 3: HUBP VTG selection */ + if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { + hwss_add_hubp_vtg_sel(seq_state, hubp, pipe_ctx->stream_res.tg->inst); + + /* Step 4: HUBP setup (choose setup2 or setup) */ + if (hubp->funcs->hubp_setup2) { + hwss_add_hubp_setup2(seq_state, hubp, &pipe_ctx->hubp_regs, + &pipe_ctx->global_sync, &pipe_ctx->stream->timing); + } else if (hubp->funcs->hubp_setup) { + hwss_add_hubp_setup(seq_state, hubp, &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs, &pipe_ctx->rq_regs, &pipe_ctx->pipe_dlg_param); + } + } + + /* Step 5: Set unbounded requesting */ + if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) { + hwss_add_hubp_set_unbounded_requesting(seq_state, hubp, pipe_ctx->unbounded_req); + } + + /* Step 6: HUBP interdependent setup */ + if (pipe_ctx->update_flags.bits.hubp_interdependent) { + if (hubp->funcs->hubp_setup_interdependent2) { + hwss_add_hubp_setup_interdependent2(seq_state, hubp, &pipe_ctx->hubp_regs); + } else if (hubp->funcs->hubp_setup_interdependent) { + hwss_add_hubp_setup_interdependent(seq_state, hubp, &pipe_ctx->dlg_regs, &pipe_ctx->ttu_regs); + } + } + + /* Step 7: DPP setup - input CSC and format setup */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + plane_state->update_flags.bits.bpp_change || + plane_state->update_flags.bits.input_csc_change || + plane_state->update_flags.bits.color_space_change || + plane_state->update_flags.bits.coeff_reduction_change) { + hwss_add_dpp_setup_dpp(seq_state, pipe_ctx); + + /* Step 8: DPP cursor matrix setup */ + if (dpp->funcs->set_cursor_matrix) { + hwss_add_dpp_set_cursor_matrix(seq_state, dpp, plane_state->color_space, + &plane_state->cursor_csc_color_matrix); + } + + /* Step 9: DPP program bias and scale */ + if (dpp->funcs->dpp_program_bias_and_scale) { + hwss_add_dpp_program_bias_and_scale(seq_state, pipe_ctx); + } + } + + /* Step 10: MPCC updates */ + if (pipe_ctx->update_flags.bits.mpcc || + pipe_ctx->update_flags.bits.plane_changed || + plane_state->update_flags.bits.global_alpha_change || + plane_state->update_flags.bits.per_pixel_alpha_change) { + + /* Check if update_mpcc_sequence is implemented and prefer it over single MPC_UPDATE_MPCC step */ + if (hws->funcs.update_mpcc_sequence) + hws->funcs.update_mpcc_sequence(dc, pipe_ctx, seq_state); + } + + /* Step 11: DPP scaler setup */ + if (pipe_ctx->update_flags.bits.scaler || + plane_state->update_flags.bits.scaling_change || + plane_state->update_flags.bits.position_change || + plane_state->update_flags.bits.per_pixel_alpha_change || + pipe_ctx->stream->update_flags.bits.scaling) { + pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->plane_state->per_pixel_alpha; + ASSERT(pipe_ctx->plane_res.scl_data.lb_params.depth == LB_PIXEL_DEPTH_36BPP); + hwss_add_dpp_set_scaler(seq_state, pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data); + } + + /* Step 12: HUBP viewport programming */ + if (pipe_ctx->update_flags.bits.viewport || + (context == dc->current_state && plane_state->update_flags.bits.position_change) || + (context == dc->current_state && plane_state->update_flags.bits.scaling_change) || + (context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) { + hwss_add_hubp_mem_program_viewport(seq_state, hubp, + &pipe_ctx->plane_res.scl_data.viewport, &pipe_ctx->plane_res.scl_data.viewport_c); + viewport_changed = true; + } + + /* Step 13: HUBP program mcache if available */ + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) { + hwss_add_hubp_program_mcache_id(seq_state, hubp, &pipe_ctx->mcache_regs); + } + + /* Step 14: Cursor attribute setup */ + if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || + pipe_ctx->update_flags.bits.scaler || viewport_changed == true) && + pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { + + hwss_add_set_cursor_attribute(seq_state, dc, pipe_ctx); + + /* Step 15: Cursor position setup */ + hwss_add_set_cursor_position(seq_state, dc, pipe_ctx); + + /* Step 16: Cursor SDR white level */ + if (dc->hwss.set_cursor_sdr_white_level) { + hwss_add_set_cursor_sdr_white_level(seq_state, dc, pipe_ctx); + } + } + + /* Step 17: Gamut remap and output CSC */ + if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.gamut_remap || + plane_state->update_flags.bits.gamut_remap_change || + pipe_ctx->stream->update_flags.bits.out_csc) { + + /* Gamut remap */ + hwss_add_dpp_program_gamut_remap(seq_state, pipe_ctx); + + /* Output CSC */ + hwss_add_program_output_csc(seq_state, dc, pipe_ctx, pipe_ctx->stream->output_color_space, + pipe_ctx->stream->csc_color_matrix.matrix, hubp->opp_id); + } + + /* Step 18: HUBP surface configuration */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->update_flags.bits.opp_changed || + plane_state->update_flags.bits.pixel_format_change || + plane_state->update_flags.bits.horizontal_mirror_change || + plane_state->update_flags.bits.rotation_change || + plane_state->update_flags.bits.swizzle_change || + plane_state->update_flags.bits.dcc_change || + plane_state->update_flags.bits.bpp_change || + plane_state->update_flags.bits.scaling_change || + plane_state->update_flags.bits.plane_size_change) { + struct plane_size size = plane_state->plane_size; + + size.surface_size = pipe_ctx->plane_res.scl_data.viewport; + hwss_add_hubp_program_surface_config(seq_state, hubp, + plane_state->format, &plane_state->tiling_info, size, + plane_state->rotation, &plane_state->dcc, + plane_state->horizontal_mirror, 0); + hubp->power_gated = false; + } + + /* Step 19: Update plane address (with SubVP support) */ + if (pipe_ctx->update_flags.bits.enable || + pipe_ctx->update_flags.bits.plane_changed || + plane_state->update_flags.bits.addr_update) { + + /* SubVP save surface address if needed */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && pipe_mall_type == SUBVP_MAIN) { + hwss_add_dmub_subvp_save_surf_addr(seq_state, dc->ctx->dmub_srv, &pipe_ctx->plane_state->address, pipe_ctx->subvp_index); + } + + /* Update plane address */ + hwss_add_hubp_update_plane_addr(seq_state, dc, pipe_ctx); + } + + /* Step 20: HUBP set blank - enable plane */ + if (pipe_ctx->update_flags.bits.enable) { + hwss_add_hubp_set_blank(seq_state, hubp, false); + } + + /* Step 21: Phantom HUBP post enable */ + if (pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable) { + hwss_add_phantom_hubp_post_enable(seq_state, hubp); + } +} + +void dcn401_update_mpcc_sequence(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + struct hubp *hubp = pipe_ctx->plane_res.hubp; + struct mpcc_blnd_cfg blnd_cfg = {0}; + bool per_pixel_alpha; + int mpcc_id; + struct mpcc *new_mpcc; + struct mpc *mpc = dc->res_pool->mpc; + struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); + + if (!hubp || !pipe_ctx->plane_state) { + return; + } + + per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha; + + /* Initialize blend configuration */ + blnd_cfg.overlap_only = false; + blnd_cfg.global_gain = 0xff; + + if (per_pixel_alpha) { + blnd_cfg.pre_multiplied_alpha = pipe_ctx->plane_state->pre_multiplied_alpha; + if (pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } + } else { + blnd_cfg.pre_multiplied_alpha = false; + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + + if (pipe_ctx->plane_state->global_alpha) + blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; + else + blnd_cfg.global_alpha = 0xff; + + blnd_cfg.background_color_bpc = 4; + blnd_cfg.bottom_gain_mode = 0; + blnd_cfg.top_gain = 0x1f000; + blnd_cfg.bottom_inside_gain = 0x1f000; + blnd_cfg.bottom_outside_gain = 0x1f000; + + if (pipe_ctx->plane_state->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA) + blnd_cfg.pre_multiplied_alpha = false; + + /* MPCC instance is equal to HUBP instance */ + mpcc_id = hubp->inst; + + /* Step 1: Update blending if no full update needed */ + if (!pipe_ctx->plane_state->update_flags.bits.full_update && + !pipe_ctx->update_flags.bits.mpcc) { + + /* Update blending configuration */ + hwss_add_mpc_update_blending(seq_state, mpc, blnd_cfg, mpcc_id); + + /* Update visual confirm color */ + hwss_add_mpc_update_visual_confirm(seq_state, dc, pipe_ctx, mpcc_id); + return; + } + + /* Step 2: Get existing MPCC for DPP */ + new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id); + + /* Step 3: Remove MPCC if being used */ + if (new_mpcc != NULL) { + hwss_add_mpc_remove_mpcc(seq_state, mpc, mpc_tree_params, new_mpcc); + } else { + /* Step 4: Assert MPCC idle (debug only) */ + if (dc->debug.sanity_checks) { + hwss_add_mpc_assert_idle_mpcc(seq_state, mpc, mpcc_id); + } + } + + /* Step 5: Insert new plane into MPC tree */ + hwss_add_mpc_insert_plane(seq_state, mpc, mpc_tree_params, blnd_cfg, NULL, NULL, hubp->inst, mpcc_id); + + /* Step 6: Update visual confirm color */ + hwss_add_mpc_update_visual_confirm(seq_state, dc, pipe_ctx, mpcc_id); + + /* Step 7: Set HUBP OPP and MPCC IDs */ + hubp->opp_id = pipe_ctx->stream_res.opp->inst; + hubp->mpcc_id = mpcc_id; +} + +static struct hubp *get_hubp_by_inst(struct resource_pool *res_pool, int mpcc_inst) +{ + int i; + + for (i = 0; i < res_pool->pipe_count; i++) { + if (res_pool->hubps[i]->inst == mpcc_inst) + return res_pool->hubps[i]; + } + ASSERT(false); + return NULL; +} + +void dcn401_wait_for_mpcc_disconnect_sequence( + struct dc *dc, + struct resource_pool *res_pool, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + int mpcc_inst; + + if (dc->debug.sanity_checks) { + dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); + } + + if (!pipe_ctx->stream_res.opp) + return; + + for (mpcc_inst = 0; mpcc_inst < MAX_PIPES; mpcc_inst++) { + if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) { + struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst); + + if (pipe_ctx->stream_res.tg && + pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg)) { + hwss_add_mpc_assert_idle_mpcc(seq_state, res_pool->mpc, mpcc_inst); + } + pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; + if (hubp) { + hwss_add_hubp_set_blank(seq_state, hubp, true); + } + } + } + + if (dc->debug.sanity_checks) { + dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); + } +} + +void dcn401_setup_vupdate_interrupt_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + struct timing_generator *tg = pipe_ctx->stream_res.tg; + int start_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); + + if (start_line < 0) + start_line = 0; + + if (tg->funcs->setup_vertical_interrupt2) { + hwss_add_tg_setup_vertical_interrupt2(seq_state, tg, start_line); + } +} + +void dcn401_set_hdr_multiplier_sequence(struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state) +{ + struct fixed31_32 multiplier = pipe_ctx->plane_state->hdr_mult; + uint32_t hw_mult = 0x1f000; // 1.0 default multiplier + struct custom_float_format fmt; + + fmt.exponenta_bits = 6; + fmt.mantissa_bits = 12; + fmt.sign = true; + + if (!dc_fixpt_eq(multiplier, dc_fixpt_from_int(0))) // check != 0 + convert_to_custom_float_format(multiplier, &fmt, &hw_mult); + + hwss_add_dpp_set_hdr_multiplier(seq_state, pipe_ctx->plane_res.dpp, hw_mult); +} + +void dcn401_program_mall_pipe_config_sequence(struct dc *dc, struct dc_state *context, + struct block_sequence_state *seq_state) +{ + int i; + unsigned int num_ways = dcn401_calculate_cab_allocation(dc, context); + bool cache_cursor = false; + + // Don't force p-state disallow -- can't block dummy p-state + + // Update MALL_SEL register for each pipe (break down update_mall_sel call) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct hubp *hubp = pipe->plane_res.hubp; + + if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { + int cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height; + + switch (hubp->curs_attr.color_format) { + case CURSOR_MODE_MONO: + cursor_size /= 2; + break; + case CURSOR_MODE_COLOR_1BIT_AND: + case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA: + case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA: + cursor_size *= 4; + break; + + case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED: + case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED: + default: + cursor_size *= 8; + break; + } + + if (cursor_size > 16384) + cache_cursor = true; + + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + hwss_add_hubp_update_mall_sel(seq_state, hubp, 1, false); + } else { + // MALL not supported with Stereo3D + uint32_t mall_sel = (num_ways <= dc->caps.cache_num_ways && + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED && + pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO && + !pipe->plane_state->address.tmz_surface) ? 2 : 0; + hwss_add_hubp_update_mall_sel(seq_state, hubp, mall_sel, cache_cursor); + } + } + } + + // Program FORCE_ONE_ROW_FOR_FRAME and CURSOR_REQ_MODE for main subvp pipes + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct hubp *hubp = pipe->plane_res.hubp; + + if (pipe->stream && hubp && hubp->funcs->hubp_prepare_subvp_buffering) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + hwss_add_hubp_prepare_subvp_buffering(seq_state, hubp, true); + } + } + } +} + +void dcn401_verify_allow_pstate_change_high_sequence(struct dc *dc, + struct block_sequence_state *seq_state) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + if (!hubbub->funcs->verify_allow_pstate_change_high) + return; + + if (!hubbub->funcs->verify_allow_pstate_change_high(hubbub)) { + /* Attempt hardware workaround force recovery */ + dcn401_hw_wa_force_recovery_sequence(dc, seq_state); + } +} + +bool dcn401_hw_wa_force_recovery_sequence(struct dc *dc, + struct block_sequence_state *seq_state) +{ + struct hubp *hubp; + unsigned int i; + + if (!dc->debug.recovery_enabled) + return false; + + /* Step 1: Set HUBP_BLANK_EN=1 for all active pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) { + hwss_add_hubp_set_blank_en(seq_state, hubp, true); + } + } + } + + /* Step 2: DCHUBBUB_GLOBAL_SOFT_RESET=1 */ + hwss_add_hubbub_soft_reset(seq_state, dc->res_pool->hubbub, hubbub1_soft_reset, true); + + /* Step 3: Set HUBP_DISABLE=1 for all active pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + if (hubp != NULL && hubp->funcs->hubp_disable_control) { + hwss_add_hubp_disable_control(seq_state, hubp, true); + } + } + } + + /* Step 4: Set HUBP_DISABLE=0 for all active pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + if (hubp != NULL && hubp->funcs->hubp_disable_control) { + hwss_add_hubp_disable_control(seq_state, hubp, false); + } + } + } + + /* Step 5: DCHUBBUB_GLOBAL_SOFT_RESET=0 */ + hwss_add_hubbub_soft_reset(seq_state, dc->res_pool->hubbub, hubbub1_soft_reset, false); + + /* Step 6: Set HUBP_BLANK_EN=0 for all active pipes */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) { + hwss_add_hubp_set_blank_en(seq_state, hubp, false); + } + } + } + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index f489bb7a4c26..f78162ab859b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -9,6 +9,7 @@ #include "dc.h" #include "dc_stream.h" #include "hw_sequencer_private.h" +#include "hwss/hw_sequencer.h" #include "dcn401/dcn401_dccg.h" struct dc; @@ -82,6 +83,8 @@ void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *l void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_update_odm_sequence(struct dc *dc, struct dc_state *context, + struct pipe_ctx *otg_master, struct block_sequence_state *seq_state); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); void dcn401_wait_for_det_buffer_update_under_otg_master(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); @@ -97,6 +100,11 @@ void dcn401_program_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); +void dcn401_program_pipe_sequence( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx); void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context); void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context); @@ -109,7 +117,97 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); +void dcn401_plane_atomic_power_down_sequence(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp, + struct block_sequence_state *seq_state); +void dcn401_plane_atomic_disconnect_sequence(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); +void dcn401_blank_pixel_data_sequence( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool blank, + struct block_sequence_state *seq_state); void dcn401_initialize_min_clocks(struct dc *dc); void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe); +void dcn401_program_all_writeback_pipes_in_tree_sequence( + struct dc *dc, + const struct dc_stream_state *stream, + struct dc_state *context, + struct block_sequence_state *seq_state); + +void dcn401_enable_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct dc_state *context, + int mpcc_inst, + struct block_sequence_state *seq_state); + +void dcn401_disable_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct block_sequence_state *seq_state); + +void dcn401_update_writeback_sequence( + struct dc *dc, + struct dc_writeback_info *wb_info, + struct dc_state *context, + struct block_sequence_state *seq_state); + +void dcn401_setup_gsl_group_as_lock_sequence( + const struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool enable, + struct block_sequence_state *seq_state); + +void dcn401_disable_plane_sequence( + struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); + +void dcn401_post_unlock_reset_opp_sequence( + struct dc *dc, + struct pipe_ctx *opp_head, + struct block_sequence_state *seq_state); + +void dcn401_dc_ip_request_cntl(struct dc *dc, bool enable); + +void dcn401_enable_plane_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); + +void dcn401_update_dchubp_dpp_sequence(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); + +void dcn401_update_mpcc_sequence(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); + +void dcn401_wait_for_mpcc_disconnect_sequence( + struct dc *dc, + struct resource_pool *res_pool, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); + +void dcn401_setup_vupdate_interrupt_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); + +void dcn401_set_hdr_multiplier_sequence(struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); + +void dcn401_program_mall_pipe_config_sequence(struct dc *dc, struct dc_state *context, + struct block_sequence_state *seq_state); + +void dcn401_verify_allow_pstate_change_high_sequence(struct dc *dc, + struct block_sequence_state *seq_state); + +bool dcn401_hw_wa_force_recovery_sequence(struct dc *dc, + struct block_sequence_state *seq_state); + #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 1c736b7e3300..162096ce0bdf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -39,6 +39,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_audio_stream = dce110_enable_audio_stream, .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, + .disable_plane_sequence = dcn401_disable_plane_sequence, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn401_interdependent_update_lock, .cursor_lock = dcn10_cursor_lock, @@ -54,6 +55,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .get_hw_state = dcn10_get_hw_state, .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .wait_for_mpcc_disconnect_sequence = dcn401_wait_for_mpcc_disconnect_sequence, .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, @@ -109,48 +111,63 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, .detect_pipe_changes = dcn401_detect_pipe_changes, .enable_plane = dcn20_enable_plane, + .enable_plane_sequence = dcn401_enable_plane_sequence, .update_dchubp_dpp = dcn20_update_dchubp_dpp, + .update_dchubp_dpp_sequence = dcn401_update_dchubp_dpp_sequence, .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, + .post_unlock_reset_opp_sequence = dcn401_post_unlock_reset_opp_sequence, .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn401_private_funcs = { .init_pipes = dcn10_init_pipes, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .plane_atomic_disconnect_sequence = dcn401_plane_atomic_disconnect_sequence, .update_mpcc = dcn20_update_mpcc, + .update_mpcc_sequence = dcn401_update_mpcc_sequence, .set_input_transfer_func = dcn32_set_input_transfer_func, .set_output_transfer_func = dcn401_set_output_transfer_func, .power_down = dce110_power_down, .enable_display_power_gating = dcn10_dummy_display_power_gating, .blank_pixel_data = dcn20_blank_pixel_data, + .blank_pixel_data_sequence = dcn401_blank_pixel_data_sequence, .reset_hw_ctx_wrap = dcn401_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .setup_vupdate_interrupt_sequence = dcn401_setup_vupdate_interrupt_sequence, .did_underflow_occur = dcn10_did_underflow_occur, .init_blank = dcn32_init_blank, .disable_vga = dcn20_disable_vga, .bios_golden_init = dcn10_bios_golden_init, .plane_atomic_disable = dcn20_plane_atomic_disable, .plane_atomic_power_down = dcn401_plane_atomic_power_down, + .plane_atomic_power_down_sequence = dcn401_plane_atomic_power_down_sequence, .enable_power_gating_plane = dcn32_enable_power_gating_plane, .hubp_pg_control = dcn32_hubp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .program_all_writeback_pipes_in_tree_sequence = dcn401_program_all_writeback_pipes_in_tree_sequence, .update_odm = dcn401_update_odm, + .update_odm_sequence = dcn401_update_odm_sequence, .dsc_pg_control = dcn32_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .set_hdr_multiplier_sequence = dcn401_set_hdr_multiplier_sequence, .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .verify_allow_pstate_change_high_sequence = dcn401_verify_allow_pstate_change_high_sequence, .wait_for_blank_complete = dcn20_wait_for_blank_complete, .dccg_init = dcn20_dccg_init, .set_mcm_luts = dcn401_set_mcm_luts, .program_mall_pipe_config = dcn32_program_mall_pipe_config, + .program_mall_pipe_config_sequence = dcn401_program_mall_pipe_config_sequence, .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn401_reset_back_end_for_pipe, .populate_mcm_luts = NULL, .perform_3dlut_wa_unlock = dcn401_perform_3dlut_wa_unlock, + .program_pipe_sequence = dcn401_program_pipe_sequence, + .dc_ip_request_cntl = dcn401_dc_ip_request_cntl, }; void dcn401_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index a937a2b2135e..3772b4aa11cc 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -31,6 +31,8 @@ #include "inc/hw/opp.h" #include "inc/hw/link_encoder.h" #include "inc/core_status.h" +#include "inc/hw/hw_shared.h" +#include "dsc/dsc.h" struct pipe_ctx; struct dc_state; @@ -48,6 +50,8 @@ struct dc_dmub_cmd; struct pg_block_update; struct drr_params; struct dc_underflow_debug_data; +struct dsc_optc_config; +struct vm_system_aperture_param; struct subvp_pipe_control_lock_fast_params { struct dc *dc; @@ -62,7 +66,7 @@ struct pipe_control_lock_params { }; struct set_flip_control_gsl_params { - struct pipe_ctx *pipe_ctx; + struct hubp *hubp; bool flip_immediate; }; @@ -154,11 +158,576 @@ struct dmub_hw_control_lock_fast_params { bool lock; }; +struct program_surface_config_params { + struct hubp *hubp; + enum surface_pixel_format format; + struct dc_tiling_info *tiling_info; + struct plane_size plane_size; + enum dc_rotation_angle rotation; + struct dc_plane_dcc_param *dcc; + bool horizontal_mirror; + int compat_level; +}; + +struct program_mcache_id_and_split_coordinate { + struct hubp *hubp; + struct dml2_hubp_pipe_mcache_regs *mcache_regs; +}; + struct program_cursor_update_now_params { struct dc *dc; struct pipe_ctx *pipe_ctx; }; +struct hubp_wait_pipe_read_start_params { + struct hubp *hubp; +}; + +struct apply_update_flags_for_phantom_params { + struct pipe_ctx *pipe_ctx; +}; + +struct update_phantom_vp_position_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; + struct dc_state *context; +}; + +struct set_odm_combine_params { + struct timing_generator *tg; + int opp_inst[MAX_PIPES]; + int opp_head_count; + int odm_slice_width; + int last_odm_slice_width; +}; + +struct set_odm_bypass_params { + struct timing_generator *tg; + const struct dc_crtc_timing *timing; +}; + +struct opp_pipe_clock_control_params { + struct output_pixel_processor *opp; + bool enable; +}; + +struct opp_program_left_edge_extra_pixel_params { + struct output_pixel_processor *opp; + enum dc_pixel_encoding pixel_encoding; + bool is_otg_master; +}; + +struct dccg_set_dto_dscclk_params { + struct dccg *dccg; + int inst; + int num_slices_h; +}; + +struct dsc_set_config_params { + struct display_stream_compressor *dsc; + struct dsc_config *dsc_cfg; + struct dsc_optc_config *dsc_optc_cfg; +}; + +struct dsc_enable_params { + struct display_stream_compressor *dsc; + int opp_inst; +}; + +struct tg_set_dsc_config_params { + struct timing_generator *tg; + struct dsc_optc_config *dsc_optc_cfg; + bool enable; +}; + +struct dsc_disconnect_params { + struct display_stream_compressor *dsc; +}; + +struct dsc_read_state_params { + struct display_stream_compressor *dsc; + struct dcn_dsc_state *dsc_state; +}; + +struct dsc_calculate_and_set_config_params { + struct pipe_ctx *pipe_ctx; + struct dsc_optc_config dsc_optc_cfg; + bool enable; + int opp_cnt; +}; + +struct dsc_enable_with_opp_params { + struct pipe_ctx *pipe_ctx; +}; + +struct program_tg_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; + struct dc_state *context; +}; + +struct tg_program_global_sync_params { + struct timing_generator *tg; + int vready_offset; + unsigned int vstartup_lines; + unsigned int vupdate_offset_pixels; + unsigned int vupdate_vupdate_width_pixels; + unsigned int pstate_keepout_start_lines; +}; + +struct tg_wait_for_state_params { + struct timing_generator *tg; + enum crtc_state state; +}; + +struct tg_set_vtg_params_params { + struct timing_generator *tg; + struct dc_crtc_timing *timing; + bool program_fp2; +}; + +struct tg_set_gsl_params { + struct timing_generator *tg; + struct gsl_params gsl; +}; + +struct tg_set_gsl_source_select_params { + struct timing_generator *tg; + int group_idx; + uint32_t gsl_ready_signal; +}; + +struct setup_vupdate_interrupt_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct tg_setup_vertical_interrupt2_params { + struct timing_generator *tg; + int start_line; +}; + +struct dpp_set_hdr_multiplier_params { + struct dpp *dpp; + uint32_t hw_mult; +}; + +struct program_det_size_params { + struct hubbub *hubbub; + unsigned int hubp_inst; + unsigned int det_buffer_size_kb; +}; + +struct program_det_segments_params { + struct hubbub *hubbub; + unsigned int hubp_inst; + unsigned int det_size; +}; + +struct update_dchubp_dpp_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; + struct dc_state *context; +}; + +struct opp_set_dyn_expansion_params { + struct output_pixel_processor *opp; + enum dc_color_space color_space; + enum dc_color_depth color_depth; + enum signal_type signal; +}; + +struct opp_program_fmt_params { + struct output_pixel_processor *opp; + struct bit_depth_reduction_params *fmt_bit_depth; + struct clamping_and_pixel_encoding_params *clamping; +}; + +struct opp_program_bit_depth_reduction_params { + struct output_pixel_processor *opp; + bool use_default_params; + struct pipe_ctx *pipe_ctx; +}; + +struct opp_set_disp_pattern_generator_params { + struct output_pixel_processor *opp; + enum controller_dp_test_pattern test_pattern; + enum controller_dp_color_space color_space; + enum dc_color_depth color_depth; + struct tg_color solid_color; + bool use_solid_color; + int width; + int height; + int offset; +}; + +struct set_abm_pipe_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct set_abm_level_params { + struct abm *abm; + unsigned int abm_level; +}; + +struct set_abm_immediate_disable_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct set_disp_pattern_generator_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; + enum controller_dp_test_pattern test_pattern; + enum controller_dp_color_space color_space; + enum dc_color_depth color_depth; + const struct tg_color *solid_color; + int width; + int height; + int offset; +}; + +struct mpc_update_blending_params { + struct mpc *mpc; + struct mpcc_blnd_cfg blnd_cfg; + int mpcc_id; +}; + +struct mpc_assert_idle_mpcc_params { + struct mpc *mpc; + int mpcc_id; +}; + +struct mpc_insert_plane_params { + struct mpc *mpc; + struct mpc_tree *mpc_tree_params; + struct mpcc_blnd_cfg blnd_cfg; + struct mpcc_sm_cfg *sm_cfg; + struct mpcc *insert_above_mpcc; + int dpp_id; + int mpcc_id; +}; + +struct mpc_remove_mpcc_params { + struct mpc *mpc; + struct mpc_tree *mpc_tree_params; + struct mpcc *mpcc_to_remove; +}; + +struct opp_set_mpcc_disconnect_pending_params { + struct output_pixel_processor *opp; + int mpcc_inst; + bool pending; +}; + +struct dc_set_optimized_required_params { + struct dc *dc; + bool optimized_required; +}; + +struct hubp_disconnect_params { + struct hubp *hubp; +}; + +struct hubbub_force_pstate_change_control_params { + struct hubbub *hubbub; + bool enable; + bool wait; +}; + +struct tg_enable_crtc_params { + struct timing_generator *tg; +}; + +struct hubp_wait_flip_pending_params { + struct hubp *hubp; + unsigned int timeout_us; + unsigned int polling_interval_us; +}; + +struct tg_wait_double_buffer_pending_params { + struct timing_generator *tg; + unsigned int timeout_us; + unsigned int polling_interval_us; +}; + +struct update_force_pstate_params { + struct dc *dc; + struct dc_state *context; +}; + +struct hubbub_apply_dedcn21_147_wa_params { + struct hubbub *hubbub; +}; + +struct hubbub_allow_self_refresh_control_params { + struct hubbub *hubbub; + bool allow; + bool *disallow_self_refresh_applied; +}; + +struct tg_get_frame_count_params { + struct timing_generator *tg; + unsigned int *frame_count; +}; + +struct mpc_set_dwb_mux_params { + struct mpc *mpc; + int dwb_id; + int mpcc_id; +}; + +struct mpc_disable_dwb_mux_params { + struct mpc *mpc; + unsigned int dwb_id; +}; + +struct mcif_wb_config_buf_params { + struct mcif_wb *mcif_wb; + struct mcif_buf_params *mcif_buf_params; + unsigned int dest_height; +}; + +struct mcif_wb_config_arb_params { + struct mcif_wb *mcif_wb; + struct mcif_arb_params *mcif_arb_params; +}; + +struct mcif_wb_enable_params { + struct mcif_wb *mcif_wb; +}; + +struct mcif_wb_disable_params { + struct mcif_wb *mcif_wb; +}; + +struct dwbc_enable_params { + struct dwbc *dwb; + struct dc_dwb_params *dwb_params; +}; + +struct dwbc_disable_params { + struct dwbc *dwb; +}; + +struct dwbc_update_params { + struct dwbc *dwb; + struct dc_dwb_params *dwb_params; +}; + +struct hubp_update_mall_sel_params { + struct hubp *hubp; + uint32_t mall_sel; + bool cache_cursor; +}; + +struct hubp_prepare_subvp_buffering_params { + struct hubp *hubp; + bool enable; +}; + +struct hubp_set_blank_en_params { + struct hubp *hubp; + bool enable; +}; + +struct hubp_disable_control_params { + struct hubp *hubp; + bool disable; +}; + +struct hubbub_soft_reset_params { + struct hubbub *hubbub; + void (*hubbub_soft_reset)(struct hubbub *hubbub, bool reset); + bool reset; +}; + +struct hubp_clk_cntl_params { + struct hubp *hubp; + bool enable; +}; + +struct hubp_init_params { + struct hubp *hubp; +}; + +struct hubp_set_vm_system_aperture_settings_params { + struct hubp *hubp; + //struct vm_system_aperture_param apt; + PHYSICAL_ADDRESS_LOC sys_default; + PHYSICAL_ADDRESS_LOC sys_low; + PHYSICAL_ADDRESS_LOC sys_high; +}; + +struct hubp_set_flip_int_params { + struct hubp *hubp; +}; + +struct dpp_dppclk_control_params { + struct dpp *dpp; + bool dppclk_div; + bool enable; +}; + +struct disable_phantom_crtc_params { + struct timing_generator *tg; +}; + +struct dpp_pg_control_params { + struct dce_hwseq *hws; + unsigned int dpp_inst; + bool power_on; +}; + +struct hubp_pg_control_params { + struct dce_hwseq *hws; + unsigned int hubp_inst; + bool power_on; +}; + +struct hubp_reset_params { + struct hubp *hubp; +}; + +struct dpp_reset_params { + struct dpp *dpp; +}; + +struct dpp_root_clock_control_params { + struct dce_hwseq *hws; + unsigned int dpp_inst; + bool clock_on; +}; + +struct dc_ip_request_cntl_params { + struct dc *dc; + bool enable; +}; + +struct dsc_pg_status_params { + struct dce_hwseq *hws; + int dsc_inst; + bool is_ungated; +}; + +struct dsc_wait_disconnect_pending_clear_params { + struct display_stream_compressor *dsc; + bool *is_ungated; +}; + +struct dsc_disable_params { + struct display_stream_compressor *dsc; + bool *is_ungated; +}; + +struct dccg_set_ref_dscclk_params { + struct dccg *dccg; + int dsc_inst; + bool *is_ungated; +}; + +struct dccg_update_dpp_dto_params { + struct dccg *dccg; + int dpp_inst; + int dppclk_khz; +}; + +struct hubp_vtg_sel_params { + struct hubp *hubp; + uint32_t otg_inst; +}; + +struct hubp_setup2_params { + struct hubp *hubp; + struct dml2_dchub_per_pipe_register_set *hubp_regs; + union dml2_global_sync_programming *global_sync; + struct dc_crtc_timing *timing; +}; + +struct hubp_setup_params { + struct hubp *hubp; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs; + struct _vcs_dpi_display_ttu_regs_st *ttu_regs; + struct _vcs_dpi_display_rq_regs_st *rq_regs; + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest; +}; + +struct hubp_set_unbounded_requesting_params { + struct hubp *hubp; + bool unbounded_req; +}; + +struct hubp_setup_interdependent2_params { + struct hubp *hubp; + struct dml2_dchub_per_pipe_register_set *hubp_regs; +}; + +struct hubp_setup_interdependent_params { + struct hubp *hubp; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs; + struct _vcs_dpi_display_ttu_regs_st *ttu_regs; +}; + +struct dpp_set_cursor_matrix_params { + struct dpp *dpp; + enum dc_color_space color_space; + struct dc_csc_transform *cursor_csc_color_matrix; +}; + +struct mpc_update_mpcc_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct dpp_set_scaler_params { + struct dpp *dpp; + const struct scaler_data *scl_data; +}; + +struct hubp_mem_program_viewport_params { + struct hubp *hubp; + const struct rect *viewport; + const struct rect *viewport_c; +}; + +struct hubp_program_mcache_id_and_split_coordinate_params { + struct hubp *hubp; + struct mcache_regs_struct *mcache_regs; +}; + +struct set_cursor_attribute_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct set_cursor_position_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct set_cursor_sdr_white_level_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; +}; + +struct program_output_csc_params { + struct dc *dc; + struct pipe_ctx *pipe_ctx; + enum dc_color_space colorspace; + uint16_t *matrix; + int opp_id; +}; + +struct hubp_set_blank_params { + struct hubp *hubp; + bool blank; +}; + +struct phantom_hubp_post_enable_params { + struct hubp *hubp; +}; + union block_sequence_params { struct update_plane_addr_params update_plane_addr_params; struct subvp_pipe_control_lock_fast_params subvp_pipe_control_lock_fast_params; @@ -179,7 +748,106 @@ union block_sequence_params { struct subvp_save_surf_addr subvp_save_surf_addr; struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params; struct dmub_hw_control_lock_fast_params dmub_hw_control_lock_fast_params; + struct program_surface_config_params program_surface_config_params; + struct program_mcache_id_and_split_coordinate program_mcache_id_and_split_coordinate; struct program_cursor_update_now_params program_cursor_update_now_params; + struct hubp_wait_pipe_read_start_params hubp_wait_pipe_read_start_params; + struct apply_update_flags_for_phantom_params apply_update_flags_for_phantom_params; + struct update_phantom_vp_position_params update_phantom_vp_position_params; + struct set_odm_combine_params set_odm_combine_params; + struct set_odm_bypass_params set_odm_bypass_params; + struct opp_pipe_clock_control_params opp_pipe_clock_control_params; + struct opp_program_left_edge_extra_pixel_params opp_program_left_edge_extra_pixel_params; + struct dccg_set_dto_dscclk_params dccg_set_dto_dscclk_params; + struct dsc_set_config_params dsc_set_config_params; + struct dsc_enable_params dsc_enable_params; + struct tg_set_dsc_config_params tg_set_dsc_config_params; + struct dsc_disconnect_params dsc_disconnect_params; + struct dsc_read_state_params dsc_read_state_params; + struct dsc_calculate_and_set_config_params dsc_calculate_and_set_config_params; + struct dsc_enable_with_opp_params dsc_enable_with_opp_params; + struct program_tg_params program_tg_params; + struct tg_program_global_sync_params tg_program_global_sync_params; + struct tg_wait_for_state_params tg_wait_for_state_params; + struct tg_set_vtg_params_params tg_set_vtg_params_params; + struct tg_setup_vertical_interrupt2_params tg_setup_vertical_interrupt2_params; + struct dpp_set_hdr_multiplier_params dpp_set_hdr_multiplier_params; + struct tg_set_gsl_params tg_set_gsl_params; + struct tg_set_gsl_source_select_params tg_set_gsl_source_select_params; + struct setup_vupdate_interrupt_params setup_vupdate_interrupt_params; + struct program_det_size_params program_det_size_params; + struct program_det_segments_params program_det_segments_params; + struct update_dchubp_dpp_params update_dchubp_dpp_params; + struct opp_set_dyn_expansion_params opp_set_dyn_expansion_params; + struct opp_program_fmt_params opp_program_fmt_params; + struct opp_program_bit_depth_reduction_params opp_program_bit_depth_reduction_params; + struct opp_set_disp_pattern_generator_params opp_set_disp_pattern_generator_params; + struct set_abm_pipe_params set_abm_pipe_params; + struct set_abm_level_params set_abm_level_params; + struct set_abm_immediate_disable_params set_abm_immediate_disable_params; + struct set_disp_pattern_generator_params set_disp_pattern_generator_params; + struct mpc_remove_mpcc_params mpc_remove_mpcc_params; + struct opp_set_mpcc_disconnect_pending_params opp_set_mpcc_disconnect_pending_params; + struct dc_set_optimized_required_params dc_set_optimized_required_params; + struct hubp_disconnect_params hubp_disconnect_params; + struct hubbub_force_pstate_change_control_params hubbub_force_pstate_change_control_params; + struct tg_enable_crtc_params tg_enable_crtc_params; + struct hubp_wait_flip_pending_params hubp_wait_flip_pending_params; + struct tg_wait_double_buffer_pending_params tg_wait_double_buffer_pending_params; + struct update_force_pstate_params update_force_pstate_params; + struct hubbub_apply_dedcn21_147_wa_params hubbub_apply_dedcn21_147_wa_params; + struct hubbub_allow_self_refresh_control_params hubbub_allow_self_refresh_control_params; + struct tg_get_frame_count_params tg_get_frame_count_params; + struct mpc_set_dwb_mux_params mpc_set_dwb_mux_params; + struct mpc_disable_dwb_mux_params mpc_disable_dwb_mux_params; + struct mcif_wb_config_buf_params mcif_wb_config_buf_params; + struct mcif_wb_config_arb_params mcif_wb_config_arb_params; + struct mcif_wb_enable_params mcif_wb_enable_params; + struct mcif_wb_disable_params mcif_wb_disable_params; + struct dwbc_enable_params dwbc_enable_params; + struct dwbc_disable_params dwbc_disable_params; + struct dwbc_update_params dwbc_update_params; + struct hubp_update_mall_sel_params hubp_update_mall_sel_params; + struct hubp_prepare_subvp_buffering_params hubp_prepare_subvp_buffering_params; + struct hubp_set_blank_en_params hubp_set_blank_en_params; + struct hubp_disable_control_params hubp_disable_control_params; + struct hubbub_soft_reset_params hubbub_soft_reset_params; + struct hubp_clk_cntl_params hubp_clk_cntl_params; + struct hubp_init_params hubp_init_params; + struct hubp_set_vm_system_aperture_settings_params hubp_set_vm_system_aperture_settings_params; + struct hubp_set_flip_int_params hubp_set_flip_int_params; + struct dpp_dppclk_control_params dpp_dppclk_control_params; + struct disable_phantom_crtc_params disable_phantom_crtc_params; + struct dpp_pg_control_params dpp_pg_control_params; + struct hubp_pg_control_params hubp_pg_control_params; + struct hubp_reset_params hubp_reset_params; + struct dpp_reset_params dpp_reset_params; + struct dpp_root_clock_control_params dpp_root_clock_control_params; + struct dc_ip_request_cntl_params dc_ip_request_cntl_params; + struct dsc_pg_status_params dsc_pg_status_params; + struct dsc_wait_disconnect_pending_clear_params dsc_wait_disconnect_pending_clear_params; + struct dsc_disable_params dsc_disable_params; + struct dccg_set_ref_dscclk_params dccg_set_ref_dscclk_params; + struct dccg_update_dpp_dto_params dccg_update_dpp_dto_params; + struct hubp_vtg_sel_params hubp_vtg_sel_params; + struct hubp_setup2_params hubp_setup2_params; + struct hubp_setup_params hubp_setup_params; + struct hubp_set_unbounded_requesting_params hubp_set_unbounded_requesting_params; + struct hubp_setup_interdependent2_params hubp_setup_interdependent2_params; + struct hubp_setup_interdependent_params hubp_setup_interdependent_params; + struct dpp_set_cursor_matrix_params dpp_set_cursor_matrix_params; + struct mpc_update_mpcc_params mpc_update_mpcc_params; + struct mpc_update_blending_params mpc_update_blending_params; + struct mpc_assert_idle_mpcc_params mpc_assert_idle_mpcc_params; + struct mpc_insert_plane_params mpc_insert_plane_params; + struct dpp_set_scaler_params dpp_set_scaler_params; + struct hubp_mem_program_viewport_params hubp_mem_program_viewport_params; + struct set_cursor_attribute_params set_cursor_attribute_params; + struct set_cursor_position_params set_cursor_position_params; + struct set_cursor_sdr_white_level_params set_cursor_sdr_white_level_params; + struct program_output_csc_params program_output_csc_params; + struct hubp_set_blank_params hubp_set_blank_params; + struct phantom_hubp_post_enable_params phantom_hubp_post_enable_params; }; enum block_sequence_func { @@ -195,6 +863,7 @@ enum block_sequence_func { DPP_SETUP_DPP, DPP_PROGRAM_BIAS_AND_SCALE, DPP_SET_OUTPUT_TRANSFER_FUNC, + DPP_SET_HDR_MULTIPLIER, MPC_UPDATE_VISUAL_CONFIRM, MPC_POWER_ON_MPC_MEM_PWR, MPC_SET_OUTPUT_CSC, @@ -202,7 +871,102 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_HW_CONTROL_LOCK_FAST, + HUBP_PROGRAM_SURFACE_CONFIG, + HUBP_PROGRAM_MCACHE_ID, PROGRAM_CURSOR_UPDATE_NOW, + HUBP_WAIT_PIPE_READ_START, + HWS_APPLY_UPDATE_FLAGS_FOR_PHANTOM, + HWS_UPDATE_PHANTOM_VP_POSITION, + OPTC_SET_ODM_COMBINE, + OPTC_SET_ODM_BYPASS, + OPP_PIPE_CLOCK_CONTROL, + OPP_PROGRAM_LEFT_EDGE_EXTRA_PIXEL, + DCCG_SET_DTO_DSCCLK, + DSC_SET_CONFIG, + DSC_ENABLE, + TG_SET_DSC_CONFIG, + DSC_DISCONNECT, + DSC_READ_STATE, + DSC_CALCULATE_AND_SET_CONFIG, + DSC_ENABLE_WITH_OPP, + TG_PROGRAM_GLOBAL_SYNC, + TG_WAIT_FOR_STATE, + TG_SET_VTG_PARAMS, + TG_SETUP_VERTICAL_INTERRUPT2, + HUBP_PROGRAM_DET_SIZE, + HUBP_PROGRAM_DET_SEGMENTS, + OPP_SET_DYN_EXPANSION, + OPP_PROGRAM_FMT, + OPP_PROGRAM_BIT_DEPTH_REDUCTION, + OPP_SET_DISP_PATTERN_GENERATOR, + ABM_SET_PIPE, + ABM_SET_LEVEL, + ABM_SET_IMMEDIATE_DISABLE, + MPC_REMOVE_MPCC, + OPP_SET_MPCC_DISCONNECT_PENDING, + DC_SET_OPTIMIZED_REQUIRED, + HUBP_DISCONNECT, + HUBBUB_FORCE_PSTATE_CHANGE_CONTROL, + TG_ENABLE_CRTC, + TG_SET_GSL, + TG_SET_GSL_SOURCE_SELECT, + HUBP_WAIT_FLIP_PENDING, + TG_WAIT_DOUBLE_BUFFER_PENDING, + UPDATE_FORCE_PSTATE, + PROGRAM_MALL_PIPE_CONFIG, + HUBBUB_APPLY_DEDCN21_147_WA, + HUBBUB_ALLOW_SELF_REFRESH_CONTROL, + TG_GET_FRAME_COUNT, + MPC_SET_DWB_MUX, + MPC_DISABLE_DWB_MUX, + MCIF_WB_CONFIG_BUF, + MCIF_WB_CONFIG_ARB, + MCIF_WB_ENABLE, + MCIF_WB_DISABLE, + DWBC_ENABLE, + DWBC_DISABLE, + DWBC_UPDATE, + HUBP_UPDATE_MALL_SEL, + HUBP_PREPARE_SUBVP_BUFFERING, + HUBP_SET_BLANK_EN, + HUBP_DISABLE_CONTROL, + HUBBUB_SOFT_RESET, + HUBP_CLK_CNTL, + HUBP_INIT, + HUBP_SET_VM_SYSTEM_APERTURE_SETTINGS, + HUBP_SET_FLIP_INT, + DPP_DPPCLK_CONTROL, + DISABLE_PHANTOM_CRTC, + DSC_PG_STATUS, + DSC_WAIT_DISCONNECT_PENDING_CLEAR, + DSC_DISABLE, + DCCG_SET_REF_DSCCLK, + DPP_PG_CONTROL, + HUBP_PG_CONTROL, + HUBP_RESET, + DPP_RESET, + DPP_ROOT_CLOCK_CONTROL, + DC_IP_REQUEST_CNTL, + DCCG_UPDATE_DPP_DTO, + HUBP_VTG_SEL, + HUBP_SETUP2, + HUBP_SETUP, + HUBP_SET_UNBOUNDED_REQUESTING, + HUBP_SETUP_INTERDEPENDENT2, + HUBP_SETUP_INTERDEPENDENT, + DPP_SET_CURSOR_MATRIX, + MPC_UPDATE_BLENDING, + MPC_ASSERT_IDLE_MPCC, + MPC_INSERT_PLANE, + DPP_SET_SCALER, + HUBP_MEM_PROGRAM_VIEWPORT, + SET_CURSOR_ATTRIBUTE, + SET_CURSOR_POSITION, + SET_CURSOR_SDR_WHITE_LEVEL, + PROGRAM_OUTPUT_CSC, + HUBP_SET_LEGACY_TILING_COMPAT_LEVEL, + HUBP_SET_BLANK, + PHANTOM_HUBP_POST_ENABLE, /* This must be the last value in this enum, add new ones above */ HWSS_BLOCK_SEQUENCE_FUNC_COUNT }; @@ -212,6 +976,11 @@ struct block_sequence { enum block_sequence_func func; }; +struct block_sequence_state { + struct block_sequence *steps; + unsigned int *num_steps; +}; + #define MAX_HWSS_BLOCK_SEQUENCE_SIZE (HWSS_BLOCK_SEQUENCE_FUNC_COUNT * MAX_PIPES) struct hw_sequencer_funcs { @@ -229,6 +998,8 @@ struct hw_sequencer_funcs { enum dc_status (*apply_ctx_to_hw)(struct dc *dc, struct dc_state *context); void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); + void (*disable_plane_sequence)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); void (*apply_ctx_for_surface)(struct dc *dc, const struct dc_stream_state *stream, @@ -246,6 +1017,10 @@ struct hw_sequencer_funcs { void (*wait_for_mpcc_disconnect)(struct dc *dc, struct resource_pool *res_pool, struct pipe_ctx *pipe_ctx); + void (*wait_for_mpcc_disconnect_sequence)(struct dc *dc, + struct resource_pool *res_pool, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); void (*edp_backlight_control)( struct dc_link *link, bool enable); @@ -485,11 +1260,23 @@ struct hw_sequencer_funcs { void (*enable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); + void (*enable_plane_sequence)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); void (*update_dchubp_dpp)(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); + void (*update_dchubp_dpp_sequence)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); void (*post_unlock_reset_opp)(struct dc *dc, struct pipe_ctx *opp_head); + void (*post_unlock_reset_opp_sequence)( + struct dc *dc, + struct pipe_ctx *opp_head, + struct block_sequence_state *seq_state); void (*get_underflow_debug_data)(const struct dc *dc, struct timing_generator *tg, struct dc_underflow_debug_data *out_data); @@ -602,4 +1389,624 @@ void hwss_set_ocsc_default(union block_sequence_params *params); void hwss_subvp_save_surf_addr(union block_sequence_params *params); +void hwss_program_surface_config(union block_sequence_params *params); + +void hwss_program_mcache_id_and_split_coordinate(union block_sequence_params *params); + +void hwss_set_odm_combine(union block_sequence_params *params); + +void hwss_set_odm_bypass(union block_sequence_params *params); + +void hwss_opp_pipe_clock_control(union block_sequence_params *params); + +void hwss_opp_program_left_edge_extra_pixel(union block_sequence_params *params); + +void hwss_blank_pixel_data(union block_sequence_params *params); + +void hwss_dccg_set_dto_dscclk(union block_sequence_params *params); + +void hwss_dsc_set_config(union block_sequence_params *params); + +void hwss_dsc_enable(union block_sequence_params *params); + +void hwss_tg_set_dsc_config(union block_sequence_params *params); + +void hwss_dsc_disconnect(union block_sequence_params *params); + +void hwss_dsc_read_state(union block_sequence_params *params); + +void hwss_dsc_calculate_and_set_config(union block_sequence_params *params); + +void hwss_dsc_enable_with_opp(union block_sequence_params *params); + +void hwss_program_tg(union block_sequence_params *params); + +void hwss_tg_program_global_sync(union block_sequence_params *params); + +void hwss_tg_wait_for_state(union block_sequence_params *params); + +void hwss_tg_set_vtg_params(union block_sequence_params *params); + +void hwss_tg_setup_vertical_interrupt2(union block_sequence_params *params); + +void hwss_dpp_set_hdr_multiplier(union block_sequence_params *params); + +void hwss_program_det_size(union block_sequence_params *params); + +void hwss_program_det_segments(union block_sequence_params *params); + +void hwss_opp_set_dyn_expansion(union block_sequence_params *params); + +void hwss_opp_program_fmt(union block_sequence_params *params); + +void hwss_opp_program_bit_depth_reduction(union block_sequence_params *params); + +void hwss_opp_set_disp_pattern_generator(union block_sequence_params *params); + +void hwss_set_abm_pipe(union block_sequence_params *params); + +void hwss_set_abm_level(union block_sequence_params *params); + +void hwss_set_abm_immediate_disable(union block_sequence_params *params); + +void hwss_mpc_remove_mpcc(union block_sequence_params *params); + +void hwss_opp_set_mpcc_disconnect_pending(union block_sequence_params *params); + +void hwss_dc_set_optimized_required(union block_sequence_params *params); + +void hwss_hubp_disconnect(union block_sequence_params *params); + +void hwss_hubbub_force_pstate_change_control(union block_sequence_params *params); + +void hwss_tg_enable_crtc(union block_sequence_params *params); + +void hwss_tg_set_gsl(union block_sequence_params *params); + +void hwss_tg_set_gsl_source_select(union block_sequence_params *params); + +void hwss_hubp_wait_flip_pending(union block_sequence_params *params); + +void hwss_tg_wait_double_buffer_pending(union block_sequence_params *params); + +void hwss_update_force_pstate(union block_sequence_params *params); + +void hwss_hubbub_apply_dedcn21_147_wa(union block_sequence_params *params); + +void hwss_hubbub_allow_self_refresh_control(union block_sequence_params *params); + +void hwss_tg_get_frame_count(union block_sequence_params *params); + +void hwss_mpc_set_dwb_mux(union block_sequence_params *params); + +void hwss_mpc_disable_dwb_mux(union block_sequence_params *params); + +void hwss_mcif_wb_config_buf(union block_sequence_params *params); + +void hwss_mcif_wb_config_arb(union block_sequence_params *params); + +void hwss_mcif_wb_enable(union block_sequence_params *params); + +void hwss_mcif_wb_disable(union block_sequence_params *params); + +void hwss_dwbc_enable(union block_sequence_params *params); + +void hwss_dwbc_disable(union block_sequence_params *params); + +void hwss_dwbc_update(union block_sequence_params *params); + +void hwss_hubp_update_mall_sel(union block_sequence_params *params); + +void hwss_hubp_prepare_subvp_buffering(union block_sequence_params *params); + +void hwss_hubp_set_blank_en(union block_sequence_params *params); + +void hwss_hubp_disable_control(union block_sequence_params *params); + +void hwss_hubbub_soft_reset(union block_sequence_params *params); + +void hwss_hubp_clk_cntl(union block_sequence_params *params); + +void hwss_hubp_init(union block_sequence_params *params); + +void hwss_hubp_set_vm_system_aperture_settings(union block_sequence_params *params); + +void hwss_hubp_set_flip_int(union block_sequence_params *params); + +void hwss_dpp_dppclk_control(union block_sequence_params *params); + +void hwss_disable_phantom_crtc(union block_sequence_params *params); + +void hwss_dsc_pg_status(union block_sequence_params *params); + +void hwss_dsc_wait_disconnect_pending_clear(union block_sequence_params *params); + +void hwss_dsc_disable(union block_sequence_params *params); + +void hwss_dccg_set_ref_dscclk(union block_sequence_params *params); + +void hwss_dpp_pg_control(union block_sequence_params *params); + +void hwss_hubp_pg_control(union block_sequence_params *params); + +void hwss_hubp_reset(union block_sequence_params *params); + +void hwss_dpp_reset(union block_sequence_params *params); + +void hwss_dpp_root_clock_control(union block_sequence_params *params); + +void hwss_dc_ip_request_cntl(union block_sequence_params *params); + +void hwss_dccg_update_dpp_dto(union block_sequence_params *params); + +void hwss_hubp_vtg_sel(union block_sequence_params *params); + +void hwss_hubp_setup2(union block_sequence_params *params); + +void hwss_hubp_setup(union block_sequence_params *params); + +void hwss_hubp_set_unbounded_requesting(union block_sequence_params *params); + +void hwss_hubp_setup_interdependent2(union block_sequence_params *params); + +void hwss_hubp_setup_interdependent(union block_sequence_params *params); + +void hwss_dpp_set_cursor_matrix(union block_sequence_params *params); + +void hwss_mpc_update_mpcc(union block_sequence_params *params); + +void hwss_mpc_update_blending(union block_sequence_params *params); + +void hwss_mpc_assert_idle_mpcc(union block_sequence_params *params); + +void hwss_mpc_insert_plane(union block_sequence_params *params); + +void hwss_dpp_set_scaler(union block_sequence_params *params); + +void hwss_hubp_mem_program_viewport(union block_sequence_params *params); + +void hwss_set_cursor_attribute(union block_sequence_params *params); + +void hwss_set_cursor_position(union block_sequence_params *params); + +void hwss_set_cursor_sdr_white_level(union block_sequence_params *params); + +void hwss_program_output_csc(union block_sequence_params *params); + +void hwss_hubp_set_legacy_tiling_compat_level(union block_sequence_params *params); + +void hwss_hubp_set_blank(union block_sequence_params *params); + +void hwss_phantom_hubp_post_enable(union block_sequence_params *params); + +void hwss_add_optc_pipe_control_lock(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx, bool lock); + +void hwss_add_hubp_set_flip_control_gsl(struct block_sequence_state *seq_state, + struct hubp *hubp, bool flip_immediate); + +void hwss_add_hubp_program_triplebuffer(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx, bool enableTripleBuffer); + +void hwss_add_hubp_update_plane_addr(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx); + +void hwss_add_dpp_set_input_transfer_func(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_plane_state *plane_state); + +void hwss_add_dpp_program_gamut_remap(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_dpp_program_bias_and_scale(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_optc_program_manual_trigger(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_dpp_set_output_transfer_func(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_stream_state *stream); + +void hwss_add_mpc_update_visual_confirm(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx, int mpcc_id); + +void hwss_add_mpc_power_on_mpc_mem_pwr(struct block_sequence_state *seq_state, + struct mpc *mpc, int mpcc_id, bool power_on); + +void hwss_add_mpc_set_output_csc(struct block_sequence_state *seq_state, + struct mpc *mpc, int opp_id, const uint16_t *regval, enum mpc_output_csc_mode ocsc_mode); + +void hwss_add_mpc_set_ocsc_default(struct block_sequence_state *seq_state, + struct mpc *mpc, int opp_id, enum dc_color_space colorspace, enum mpc_output_csc_mode ocsc_mode); + +void hwss_add_dmub_send_dmcub_cmd(struct block_sequence_state *seq_state, + struct dc_context *ctx, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); + +void hwss_add_dmub_subvp_save_surf_addr(struct block_sequence_state *seq_state, + struct dc_dmub_srv *dc_dmub_srv, struct dc_plane_address *addr, uint8_t subvp_index); + +void hwss_add_hubp_wait_for_dcc_meta_prop(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *top_pipe_to_program); + +void hwss_add_hubp_wait_pipe_read_start(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_hws_apply_update_flags_for_phantom(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_hws_update_phantom_vp_position(struct block_sequence_state *seq_state, + struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx); + +void hwss_add_optc_set_odm_combine(struct block_sequence_state *seq_state, + struct timing_generator *tg, int opp_inst[MAX_PIPES], int opp_head_count, + int odm_slice_width, int last_odm_slice_width); + +void hwss_add_optc_set_odm_bypass(struct block_sequence_state *seq_state, + struct timing_generator *optc, struct dc_crtc_timing *timing); + +void hwss_add_tg_program_global_sync(struct block_sequence_state *seq_state, + struct timing_generator *tg, + int vready_offset, + unsigned int vstartup_lines, + unsigned int vupdate_offset_pixels, + unsigned int vupdate_vupdate_width_pixels, + unsigned int pstate_keepout_start_lines); + +void hwss_add_tg_wait_for_state(struct block_sequence_state *seq_state, + struct timing_generator *tg, enum crtc_state state); + +void hwss_add_tg_set_vtg_params(struct block_sequence_state *seq_state, + struct timing_generator *tg, struct dc_crtc_timing *dc_crtc_timing, bool program_fp2); + +void hwss_add_tg_setup_vertical_interrupt2(struct block_sequence_state *seq_state, + struct timing_generator *tg, int start_line); + +void hwss_add_dpp_set_hdr_multiplier(struct block_sequence_state *seq_state, + struct dpp *dpp, uint32_t hw_mult); + +void hwss_add_hubp_program_det_size(struct block_sequence_state *seq_state, + struct hubbub *hubbub, unsigned int hubp_inst, unsigned int det_buffer_size_kb); + +void hwss_add_hubp_program_mcache_id(struct block_sequence_state *seq_state, + struct hubp *hubp, struct dml2_hubp_pipe_mcache_regs *mcache_regs); + +void hwss_add_hubbub_force_pstate_change_control(struct block_sequence_state *seq_state, + struct hubbub *hubbub, bool enable, bool wait); + +void hwss_add_hubp_program_det_segments(struct block_sequence_state *seq_state, + struct hubbub *hubbub, unsigned int hubp_inst, unsigned int det_size); + +void hwss_add_opp_set_dyn_expansion(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, enum dc_color_space color_sp, + enum dc_color_depth color_dpth, enum signal_type signal); + +void hwss_add_opp_program_fmt(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, struct bit_depth_reduction_params *fmt_bit_depth, + struct clamping_and_pixel_encoding_params *clamping); + +void hwss_add_abm_set_pipe(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx); + +void hwss_add_abm_set_level(struct block_sequence_state *seq_state, + struct abm *abm, uint32_t abm_level); + +void hwss_add_tg_enable_crtc(struct block_sequence_state *seq_state, + struct timing_generator *tg); + +void hwss_add_hubp_wait_flip_pending(struct block_sequence_state *seq_state, + struct hubp *hubp, unsigned int timeout_us, unsigned int polling_interval_us); + +void hwss_add_tg_wait_double_buffer_pending(struct block_sequence_state *seq_state, + struct timing_generator *tg, unsigned int timeout_us, unsigned int polling_interval_us); + +void hwss_add_dccg_set_dto_dscclk(struct block_sequence_state *seq_state, + struct dccg *dccg, int inst, int num_slices_h); + +void hwss_add_dsc_calculate_and_set_config(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx, bool enable, int opp_cnt); + +void hwss_add_mpc_remove_mpcc(struct block_sequence_state *seq_state, + struct mpc *mpc, struct mpc_tree *mpc_tree_params, struct mpcc *mpcc_to_remove); + +void hwss_add_opp_set_mpcc_disconnect_pending(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, int mpcc_inst, bool pending); + +void hwss_add_hubp_disconnect(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_dsc_enable_with_opp(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_dsc_disconnect(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc); + +void hwss_add_dc_set_optimized_required(struct block_sequence_state *seq_state, + struct dc *dc, bool optimized_required); + +void hwss_add_abm_set_immediate_disable(struct block_sequence_state *seq_state, + struct dc *dc, struct pipe_ctx *pipe_ctx); + +void hwss_add_opp_set_disp_pattern_generator(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + enum controller_dp_test_pattern test_pattern, + enum controller_dp_color_space color_space, + enum dc_color_depth color_depth, + struct tg_color solid_color, + bool use_solid_color, + int width, + int height, + int offset); + +void hwss_add_opp_program_bit_depth_reduction(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + bool use_default_params, + struct pipe_ctx *pipe_ctx); + +void hwss_add_dc_ip_request_cntl(struct block_sequence_state *seq_state, + struct dc *dc, + bool enable); + +void hwss_add_dwbc_update(struct block_sequence_state *seq_state, + struct dwbc *dwb, + struct dc_dwb_params *dwb_params); + +void hwss_add_mcif_wb_config_buf(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb, + struct mcif_buf_params *mcif_buf_params, + unsigned int dest_height); + +void hwss_add_mcif_wb_config_arb(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb, + struct mcif_arb_params *mcif_arb_params); + +void hwss_add_mcif_wb_enable(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb); + +void hwss_add_mcif_wb_disable(struct block_sequence_state *seq_state, + struct mcif_wb *mcif_wb); + +void hwss_add_mpc_set_dwb_mux(struct block_sequence_state *seq_state, + struct mpc *mpc, + int dwb_id, + int mpcc_id); + +void hwss_add_mpc_disable_dwb_mux(struct block_sequence_state *seq_state, + struct mpc *mpc, + unsigned int dwb_id); + +void hwss_add_dwbc_enable(struct block_sequence_state *seq_state, + struct dwbc *dwb, + struct dc_dwb_params *dwb_params); + +void hwss_add_dwbc_disable(struct block_sequence_state *seq_state, + struct dwbc *dwb); + +void hwss_add_tg_set_gsl(struct block_sequence_state *seq_state, + struct timing_generator *tg, + struct gsl_params gsl); + +void hwss_add_tg_set_gsl_source_select(struct block_sequence_state *seq_state, + struct timing_generator *tg, + int group_idx, + uint32_t gsl_ready_signal); + +void hwss_add_hubp_update_mall_sel(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint32_t mall_sel, + bool cache_cursor); + +void hwss_add_hubp_prepare_subvp_buffering(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable); + +void hwss_add_hubp_set_blank_en(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable); + +void hwss_add_hubp_disable_control(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool disable); + +void hwss_add_hubbub_soft_reset(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + void (*hubbub_soft_reset)(struct hubbub *hubbub, bool reset), + bool reset); + +void hwss_add_hubp_clk_cntl(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool enable); + +void hwss_add_dpp_dppclk_control(struct block_sequence_state *seq_state, + struct dpp *dpp, + bool dppclk_div, + bool enable); + +void hwss_add_disable_phantom_crtc(struct block_sequence_state *seq_state, + struct timing_generator *tg); + +void hwss_add_dsc_pg_status(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + int dsc_inst, + bool is_ungated); + +void hwss_add_dsc_wait_disconnect_pending_clear(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc, + bool *is_ungated); + +void hwss_add_dsc_disable(struct block_sequence_state *seq_state, + struct display_stream_compressor *dsc, + bool *is_ungated); + +void hwss_add_dccg_set_ref_dscclk(struct block_sequence_state *seq_state, + struct dccg *dccg, + int dsc_inst, + bool *is_ungated); + +void hwss_add_dpp_root_clock_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool clock_on); + +void hwss_add_dpp_pg_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool power_on); + +void hwss_add_hubp_pg_control(struct block_sequence_state *seq_state, + struct dce_hwseq *hws, + unsigned int hubp_inst, + bool power_on); + +void hwss_add_hubp_set_blank(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool blank); + +void hwss_add_hubp_init(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_hubp_reset(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_dpp_reset(struct block_sequence_state *seq_state, + struct dpp *dpp); + +void hwss_add_opp_pipe_clock_control(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + bool enable); + +void hwss_add_hubp_set_vm_system_aperture_settings(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint64_t sys_default, + uint64_t sys_low, + uint64_t sys_high); + +void hwss_add_hubp_set_flip_int(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_dccg_update_dpp_dto(struct block_sequence_state *seq_state, + struct dccg *dccg, + int dpp_inst, + int dppclk_khz); + +void hwss_add_hubp_vtg_sel(struct block_sequence_state *seq_state, + struct hubp *hubp, + uint32_t otg_inst); + +void hwss_add_hubp_setup2(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *hubp_regs, + union dml2_global_sync_programming *global_sync, + struct dc_crtc_timing *timing); + +void hwss_add_hubp_setup(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_regs, + struct _vcs_dpi_display_ttu_regs_st *ttu_regs, + struct _vcs_dpi_display_rq_regs_st *rq_regs, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + +void hwss_add_hubp_set_unbounded_requesting(struct block_sequence_state *seq_state, + struct hubp *hubp, + bool unbounded_req); + +void hwss_add_hubp_setup_interdependent2(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct dml2_dchub_per_pipe_register_set *hubp_regs); + +void hwss_add_hubp_setup_interdependent(struct block_sequence_state *seq_state, + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_regs, + struct _vcs_dpi_display_ttu_regs_st *ttu_regs); +void hwss_add_hubp_program_surface_config(struct block_sequence_state *seq_state, + struct hubp *hubp, + enum surface_pixel_format format, + struct dc_tiling_info *tiling_info, + struct plane_size plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + int compat_level); + +void hwss_add_dpp_setup_dpp(struct block_sequence_state *seq_state, + struct pipe_ctx *pipe_ctx); + +void hwss_add_dpp_set_cursor_matrix(struct block_sequence_state *seq_state, + struct dpp *dpp, + enum dc_color_space color_space, + struct dc_csc_transform *cursor_csc_color_matrix); + +void hwss_add_mpc_update_blending(struct block_sequence_state *seq_state, + struct mpc *mpc, + struct mpcc_blnd_cfg blnd_cfg, + int mpcc_id); + +void hwss_add_mpc_assert_idle_mpcc(struct block_sequence_state *seq_state, + struct mpc *mpc, + int mpcc_id); + +void hwss_add_mpc_insert_plane(struct block_sequence_state *seq_state, + struct mpc *mpc, + struct mpc_tree *mpc_tree_params, + struct mpcc_blnd_cfg blnd_cfg, + struct mpcc_sm_cfg *sm_cfg, + struct mpcc *insert_above_mpcc, + int dpp_id, + int mpcc_id); + +void hwss_add_dpp_set_scaler(struct block_sequence_state *seq_state, + struct dpp *dpp, + const struct scaler_data *scl_data); + +void hwss_add_hubp_mem_program_viewport(struct block_sequence_state *seq_state, + struct hubp *hubp, + const struct rect *viewport, + const struct rect *viewport_c); + +void hwss_add_set_cursor_attribute(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx); + +void hwss_add_set_cursor_position(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx); + +void hwss_add_set_cursor_sdr_white_level(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx); + +void hwss_add_program_output_csc(struct block_sequence_state *seq_state, + struct dc *dc, + struct pipe_ctx *pipe_ctx, + enum dc_color_space colorspace, + uint16_t *matrix, + int opp_id); + +void hwss_add_phantom_hubp_post_enable(struct block_sequence_state *seq_state, + struct hubp *hubp); + +void hwss_add_update_force_pstate(struct block_sequence_state *seq_state, + struct dc *dc, + struct dc_state *context); + +void hwss_add_hubbub_apply_dedcn21_147_wa(struct block_sequence_state *seq_state, + struct hubbub *hubbub); + +void hwss_add_hubbub_allow_self_refresh_control(struct block_sequence_state *seq_state, + struct hubbub *hubbub, + bool allow, + bool *disallow_self_refresh_applied); + +void hwss_add_tg_get_frame_count(struct block_sequence_state *seq_state, + struct timing_generator *tg, + unsigned int *frame_count); + +void hwss_add_tg_set_dsc_config(struct block_sequence_state *seq_state, + struct timing_generator *tg, + struct dsc_optc_config *dsc_optc_cfg, + bool enable); + +void hwss_add_opp_program_left_edge_extra_pixel(struct block_sequence_state *seq_state, + struct output_pixel_processor *opp, + enum dc_pixel_encoding pixel_encoding, + bool is_otg_master); + #endif /* __DC_HW_SEQUENCER_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 1e2d247fbbac..406db231bc72 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -27,6 +27,7 @@ #define __DC_HW_SEQUENCER_PRIVATE_H__ #include "dc_types.h" +#include "hw_sequencer.h" enum pipe_gating_control { PIPE_GATING_CONTROL_DISABLE = 0, @@ -80,7 +81,13 @@ struct hwseq_private_funcs { void (*plane_atomic_disconnect)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); + void (*plane_atomic_disconnect_sequence)(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*update_mpcc_sequence)(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); bool (*set_input_transfer_func)(struct dc *dc, struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state); @@ -97,6 +104,10 @@ struct hwseq_private_funcs { void (*blank_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); + void (*blank_pixel_data_sequence)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool blank, + struct block_sequence_state *seq_state); enum dc_status (*enable_stream_timing)( struct pipe_ctx *pipe_ctx, struct dc_state *context, @@ -105,6 +116,8 @@ struct hwseq_private_funcs { bool enable); void (*setup_vupdate_interrupt)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*setup_vupdate_interrupt_sequence)(struct dc *dc, struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); bool (*did_underflow_occur)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*init_blank)(struct dc *dc, struct timing_generator *tg); void (*disable_vga)(struct dce_hwseq *hws); @@ -112,6 +125,10 @@ struct hwseq_private_funcs { void (*plane_atomic_power_down)(struct dc *dc, struct dpp *dpp, struct hubp *hubp); + void (*plane_atomic_power_down_sequence)(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp, + struct block_sequence_state *seq_state); void (*plane_atomic_disable)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*enable_power_gating_plane)(struct dce_hwseq *hws, bool enable); @@ -140,15 +157,31 @@ struct hwseq_private_funcs { unsigned int dsc_inst); void (*update_odm)(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx); + void (*update_odm_sequence)(struct dc *dc, struct dc_state *context, + struct pipe_ctx *pipe_ctx, struct block_sequence_state *seq_state); void (*program_all_writeback_pipes_in_tree)(struct dc *dc, const struct dc_stream_state *stream, struct dc_state *context); + void (*program_all_writeback_pipes_in_tree_sequence)( + struct dc *dc, + const struct dc_stream_state *stream, + struct dc_state *context, + struct block_sequence_state *seq_state); bool (*s0i3_golden_init_wa)(struct dc *dc); void (*set_hdr_multiplier)(struct pipe_ctx *pipe_ctx); + void (*set_hdr_multiplier_sequence)(struct pipe_ctx *pipe_ctx, + struct block_sequence_state *seq_state); void (*verify_allow_pstate_change_high)(struct dc *dc); + void (*verify_allow_pstate_change_high_sequence)(struct dc *dc, + struct block_sequence_state *seq_state); void (*program_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); + void (*program_pipe_sequence)( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct block_sequence_state *seq_state); bool (*wait_for_blank_complete)(struct output_pixel_processor *opp); void (*dccg_init)(struct dce_hwseq *hws); bool (*set_blend_lut)(struct pipe_ctx *pipe_ctx, @@ -163,6 +196,8 @@ struct hwseq_private_funcs { void (*enable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); void (*program_mall_pipe_config)(struct dc *dc, struct dc_state *context); + void (*program_mall_pipe_config_sequence)(struct dc *dc, struct dc_state *context, + struct block_sequence_state *seq_state); void (*update_force_pstate)(struct dc *dc, struct dc_state *context); void (*update_mall_sel)(struct dc *dc, struct dc_state *context); unsigned int (*calculate_dccg_k1_k2_values)(struct pipe_ctx *pipe_ctx, @@ -186,6 +221,7 @@ struct hwseq_private_funcs { void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx); void (*wait_for_pipe_update_if_needed)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only); void (*set_wait_for_update_needed_for_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*dc_ip_request_cntl)(struct dc *dc, bool enable); }; struct dce_hwseq { -- cgit v1.2.3 From 0c07085e5cf63536214975a9a5a2a3f91a4f568c Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 3 Oct 2025 16:19:27 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.1.31.0 Release highlights: DCN35/351/36: * fix video lag with replay * DPP DTO programming sequence fix * IPS exit programming sequence fix DCN 3.1.5: * fix video lag with replay Signed-off-by: Taimur Hassan Signed-off-by: Aurabindo Pillai Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index be6151d529d8..b2d5154d1402 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -973,7 +973,8 @@ union dmub_fw_boot_options { uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */ uint32_t lower_hbr3_phy_ssc: 1; /**< 1 to lower hbr3 phy ssc to 0.125 percent */ uint32_t override_hbr3_pll_vco: 1; /**< 1 to override the hbr3 pll vco to 0 */ - uint32_t reserved : 5; /**< reserved */ + uint32_t disable_dpia_bw_allocation: 1; /**< 1 to disable the USB4 DPIA BW allocation */ + uint32_t reserved : 4; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; -- cgit v1.2.3 From ccdc171b34a0f33f1abc015872bf7f3d65e67607 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 3 Oct 2025 18:31:30 -0500 Subject: drm/amd/display: Promote DC to 3.2.354 Display Core v3.2.354 release highlights: * DCN35 dispclk, dppclk & other fixes * DCN401 cursor offload fix * Add new block seqeunce-building/executing functions * null ptr fixes * DPIA hpd fix * debug improvements * Fix performance regression from full updates * Firmware Release 0.1.31.0 Signed-off-by: Taimur Hassan Signed-off-by: Aurabindo Pillai Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8b9a78ff234c..ddd431ec4d52 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.353" +#define DC_VER "3.2.354" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From 8c62f75cb7e9016678c28a10638107515f8b2ad2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 11 Sep 2025 12:41:40 +0100 Subject: drm/amdgpu: Use memset32 for IB padding Use memset32 instead of open coding it, just because it is that bit nicer. Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5ec5c3ff22bb..43f769fed810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -159,8 +159,16 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - while (ib->length_dw & ring->funcs->align_mask) - ib->ptr[ib->length_dw++] = ring->funcs->nop; + u32 align_mask = ring->funcs->align_mask; + u32 count = ib->length_dw & align_mask; + + if (count) { + count = align_mask + 1 - count; + + memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count); + + ib->length_dw += count; + } } /** -- cgit v1.2.3 From cdfdec6f1608b4830eeb1ae6c89d3eec91673807 Mon Sep 17 00:00:00 2001 From: Ilya Zlobintsev Date: Mon, 13 Oct 2025 19:30:42 +0300 Subject: drm/amd/pm: Avoid writing nulls into `pp_od_clk_voltage` Calling `smu_cmn_get_sysfs_buf` aligns the offset used by `sysfs_emit_at` to the current page boundary, which was previously directly returned from the various `print_clk_levels` implementations to be added to the buffer position. Instead, only the relative offset showing how much was written to the buffer should be returned, regardless of how it was changed for alignment purposes. Reviewed-by: Lijo Lazar Signed-off-by: Ilya Zlobintsev Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 15 ++++++++------- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 10 ++++++---- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 7 ++++--- 13 files changed, 52 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index 9548bd3c624b..55401e6b2b0b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -291,11 +291,12 @@ static int cyan_skillfish_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int ret = 0, size = 0; + int ret = 0, size = 0, start_offset = 0; uint32_t cur_value = 0; int i; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -353,7 +354,7 @@ static int cyan_skillfish_print_clk_levels(struct smu_context *smu, return ret; } - return size; + return size - start_offset; } static bool cyan_skillfish_is_dpm_running(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 0028f10ead42..bbf09aec9152 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1469,7 +1469,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { uint16_t *curve_settings; - int i, levels, size = 0, ret = 0; + int i, levels, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; @@ -1484,6 +1484,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, uint32_t min_value, max_value; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_GFXCLK: @@ -1497,11 +1498,11 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_DCEFCLK: ret = navi10_get_current_clk_freq_by_table(smu, clk_type, &cur_value); if (ret) - return size; + return size - start_offset; ret = smu_v11_0_get_dpm_level_count(smu, clk_type, &count); if (ret) - return size; + return size - start_offset; ret = navi10_is_support_fine_grained_dpm(smu, clk_type); if (ret < 0) @@ -1511,7 +1512,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, for (i = 0; i < count; i++) { ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value); if (ret) - return size; + return size - start_offset; size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); @@ -1519,10 +1520,10 @@ static int navi10_print_clk_levels(struct smu_context *smu, } else { ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]); if (ret) - return size; + return size - start_offset; ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]); if (ret) - return size; + return size - start_offset; freq_values[1] = cur_value; mark_index = cur_value == freq_values[0] ? 0 : @@ -1653,7 +1654,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int navi10_force_clk_levels(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 31c2c0386b1f..774283ac7827 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1281,7 +1281,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, struct smu_11_0_7_overdrive_table *od_settings = smu->od_settings; OverDriveTable_t *od_table = (OverDriveTable_t *)table_context->overdrive_table; - int i, size = 0, ret = 0; + int i, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; @@ -1289,6 +1289,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, uint32_t min_value, max_value; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_GFXCLK: @@ -1434,7 +1435,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, } print_clk_out: - return size; + return size - start_offset; } static int sienna_cichlid_force_clk_levels(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 1dcbf250f486..53579208cffb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -565,7 +565,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_legacy_t metrics; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; @@ -576,6 +576,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, return ret; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -658,7 +659,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int vangogh_print_clk_levels(struct smu_context *smu, @@ -666,7 +667,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, { DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; uint32_t min, max; @@ -678,6 +679,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, return ret; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -779,7 +781,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int vangogh_common_print_clk_levels(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 3baf20f4c373..eaa9ea162f16 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -494,7 +494,7 @@ static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) static int renoir_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; SmuMetrics_t metrics; bool cur_value_match_level = false; @@ -506,6 +506,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, return ret; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_RANGE: @@ -550,7 +551,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", max, i == 2 ? "*" : ""); } - return size; + return size - start_offset; case SMU_SOCCLK: count = NUM_SOCCLK_DPM_LEVELS; cur_value = metrics.ClockFrequency[CLOCK_SOCCLK]; @@ -607,7 +608,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index c1062e5f0393..677781060246 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1195,15 +1195,16 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, struct smu_13_0_dpm_table *single_dpm_table; struct smu_13_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; - int i, curr_freq, size = 0; + int i, curr_freq, size = 0, start_offset = 0; int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; if (amdgpu_ras_intr_triggered()) { size += sysfs_emit_at(buf, size, "unavailable\n"); - return size; + return size - start_offset; } switch (clk_type) { @@ -1534,7 +1535,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index b081ae3e8f43..6908f9930f16 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -497,11 +497,12 @@ static int smu_v13_0_4_get_dpm_level_count(struct smu_context *smu, static int smu_v13_0_4_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -565,7 +566,7 @@ static int smu_v13_0_4_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int smu_v13_0_4_read_sensor(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index f5db181ef489..4576bf008b22 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -861,11 +861,12 @@ out: static int smu_v13_0_5_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min = 0, max = 0; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -928,7 +929,7 @@ static int smu_v13_0_5_print_clk_levels(struct smu_context *smu, } print_clk_out: - return size; + return size - start_offset; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 5d4315c4aa27..cc7a0e061ba1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1428,7 +1428,7 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, enum smu_clk_type type, char *buf) { - int now, size = 0; + int now, size = 0, start_offset = 0; int ret = 0; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; struct smu_13_0_dpm_table *single_dpm_table; @@ -1437,10 +1437,11 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, uint32_t min_clk, max_clk; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; if (amdgpu_ras_intr_triggered()) { size += sysfs_emit_at(buf, size, "unavailable\n"); - return size; + return size - start_offset; } dpm_context = smu_dpm->dpm_context; @@ -1575,7 +1576,7 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int smu_v13_0_6_upload_dpm_level(struct smu_context *smu, bool max, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c96fa5e49ed6..a3fc35b9011e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1184,15 +1184,16 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, struct smu_13_0_dpm_table *single_dpm_table; struct smu_13_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; - int i, curr_freq, size = 0; + int i, curr_freq, size = 0, start_offset = 0; int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; if (amdgpu_ras_intr_triggered()) { size += sysfs_emit_at(buf, size, "unavailable\n"); - return size; + return size - start_offset; } switch (clk_type) { @@ -1523,7 +1524,7 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long input) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 73b4506ef5a8..5d7e671fa3c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -1041,12 +1041,13 @@ static uint32_t yellow_carp_get_umd_pstate_clk_default(struct smu_context *smu, static int yellow_carp_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, idx, size = 0, ret = 0; + int i, idx, size = 0, ret = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; uint32_t clk_limit = 0; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -1111,7 +1112,7 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, } print_clk_out: - return size; + return size - start_offset; } static int yellow_carp_force_clk_levels(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index fe00c84b1cc6..b1bd946d8e30 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -1132,11 +1132,12 @@ static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu, static int smu_v14_0_0_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, idx, ret = 0, size = 0; + int i, idx, ret = 0, size = 0, start_offset = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; switch (clk_type) { case SMU_OD_SCLK: @@ -1202,7 +1203,7 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 086501cc5213..2cea688c604f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1056,15 +1056,16 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, struct smu_14_0_dpm_table *single_dpm_table; struct smu_14_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; - int i, curr_freq, size = 0; + int i, curr_freq, size = 0, start_offset = 0; int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); + start_offset = size; if (amdgpu_ras_intr_triggered()) { size += sysfs_emit_at(buf, size, "unavailable\n"); - return size; + return size - start_offset; } switch (clk_type) { @@ -1374,7 +1375,7 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, break; } - return size; + return size - start_offset; } static int smu_v14_0_2_force_clk_levels(struct smu_context *smu, -- cgit v1.2.3 From bd07b3f08a0c9c91a33143215eda8a57f575dc2a Mon Sep 17 00:00:00 2001 From: Alysa Liu Date: Fri, 10 Oct 2025 17:18:09 -0400 Subject: drm/amdgpu: Fix vram_usage underflow vram_usage was subtracting non-vram memory size, which caused it to become negative. Signed-off-by: Alysa Liu Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index dbdf33bc03b8..96ccd5ade031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1961,9 +1961,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( */ if (size) { if (!is_imported && - (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - (adev->apu_prefer_gtt && - mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) *size = bo_size; else *size = 0; -- cgit v1.2.3 From 152dca4ea77e575b21c5ee96c582fc1083a25f8a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 14 Oct 2025 14:30:35 -0500 Subject: drm/amd: Add a helper to tell whether an IP block HW is enabled There is already a helper for telling if a block is valid, but if IP handling wants to check if it's HW is enabled no such helper exists. Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d9bd1d71552e..84c9ff86b495 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -372,6 +372,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); +bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, enum amd_ip_block_type block_type); int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0d5585bc3b04..038804d48341 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2387,7 +2387,7 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, } /** - * amdgpu_device_ip_is_valid - is the hardware IP enabled + * amdgpu_device_ip_is_hw - is the hardware IP enabled * * @adev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) @@ -2395,6 +2395,27 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, * Check if the hardware IP is enable or not. * Returns true if it the IP is enable, false if not. */ +bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) +{ + int i; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == block_type) + return adev->ip_blocks[i].status.hw; + } + return false; +} + +/** + * amdgpu_device_ip_is_valid - is the hardware IP valid + * + * @adev: amdgpu_device pointer + * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * + * Check if the hardware IP is valid or not. + * Returns true if it the IP is valid, false if not. + */ bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, enum amd_ip_block_type block_type) { -- cgit v1.2.3 From e71ca1efd306dd671bd5752ff1c0c5de9b2bd03f Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 9 Oct 2025 10:38:28 +0800 Subject: drm/amdgpu: Add kiq hdp flush callbacks Add kiq hdp flush callbacks for gfx ips to support gpu hdp flush when no ring presents Reviewed-by: Lijo Lazar Signed-off-by: Victor Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 6 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8841d7213de4..751732f3e883 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9951,6 +9951,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d61eb9f187c6..252517ce5d5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -2438,7 +2438,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) if (version_minor == 3) gfx_v11_0_load_rlcp_rlcv_microcode(adev); } - + return 0; } @@ -3886,7 +3886,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) } memcpy(fw, fw_data, fw_size); - + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); @@ -7318,6 +7318,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, }; static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 93fde0f9af87..35d5a7e99a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5595,6 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, }; static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0856ff65288c..d3d0a4b0380c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6939,6 +6939,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v8_0_ring_emit_rreg, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, }; static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index dd19a97436db..f1a2efc2a8d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7586,6 +7586,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index fb5585ab52be..e0b50c690f8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4792,6 +4792,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, }; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) -- cgit v1.2.3 From 6169b555db1392e79159e114fff105987231e4ce Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 9 Oct 2025 10:42:48 +0800 Subject: drm/amdgpu: use GPU_HDP_FLUSH for sriov Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for hdp flush. This register need to be write from CPU for nbif to aware, otherwise it will not work. Implement amdgpu_kiq_hdp_flush and use kiq to do gpu hdp flush during sriov runtime. v2: - fallback to amdgpu_asic_flush_hdp when amdgpu_kiq_hdp_flush failed - add function amdgpu_mes_hdp_flush v3: - changed returned error Reviewed-by: Lijo Lazar Signed-off-by: Victor Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 71 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + 5 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 038804d48341..a99185ed0642 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -7315,10 +7315,17 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, if (adev->gmc.xgmi.connected_to_cpu) return; - if (ring && ring->funcs->emit_hdp_flush) + if (ring && ring->funcs->emit_hdp_flush) { amdgpu_ring_emit_hdp_flush(ring); - else - amdgpu_asic_flush_hdp(adev, ring); + return; + } + + if (!ring && amdgpu_sriov_runtime(adev)) { + if (!amdgpu_kiq_hdp_flush(adev)) + return; + } + + amdgpu_asic_flush_hdp(adev, ring); } void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ebe2b4c68b0f..29c927f4d6df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -33,6 +33,7 @@ #include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" +#include "amdgpu_mes.h" #include "nvd.h" /* delay 0.1 second to enable gfx off feature */ @@ -1194,6 +1195,75 @@ failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); } +int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *ring = &kiq->ring; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready) + return amdgpu_mes_hdp_flush(adev); + + if (!ring->funcs->emit_hdp_flush) { + return -EOPNOTSUPP; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + + amdgpu_ring_emit_hdp_flush(ring); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + goto failed_kiq_hdp_flush; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) + goto failed_kiq_hdp_flush; + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) { + dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n"); + return -ETIMEDOUT; + } + + return 0; + +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq_hdp_flush: + dev_err(adev->dev, "failed to flush HDP via KIQ\n"); + return r < 0 ? r : -EIO; +} + int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) { if (amdgpu_num_kcq == -1) { @@ -2485,3 +2555,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) &amdgpu_debugfs_compute_sched_mask_fops); #endif } + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index fb5f7a0ee029..efd61a1ccc66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); +int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 45bf8309cb37..9c182ce501af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -528,6 +528,18 @@ error: return r; } +int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) +{ + uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask; + + hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); + hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); + ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0; + + return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, + ref_and_mask, ref_and_mask); +} + int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr, uint32_t spi_gdbg_per_vmid_cntl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 9c27a68cb82f..e989225b354b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -429,6 +429,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); +int amdgpu_mes_hdp_flush(struct amdgpu_device *adev); int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr, uint32_t spi_gdbg_per_vmid_cntl, -- cgit v1.2.3 From d5a62b7aa9323fe10bcbf5e6c22b4522a80c07bb Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 14 Oct 2025 13:11:02 +0530 Subject: drm/amdgpu: add the kernel docs for alloc/free/valid range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add kernel docs for the functions related to hmm_range. Documents added for functions: amdgpu_hmm_range_valid amdgpu_hmm_range_alloc amdgpu_hmm_range_free Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 04f02e0c8bb3..d6f903a2d573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -227,6 +227,19 @@ out_free_range: return r; } +/** + * amdgpu_hmm_range_valid - check if an HMM range is still valid + * @range: pointer to the &struct amdgpu_hmm_range to validate + * + * Determines whether the given HMM range @range is still valid by + * checking for invalidations via the MMU notifier sequence. This is + * typically used to verify that the range has not been invalidated + * by concurrent address space updates before it is accessed. + * + * Return: + * * true if @range is valid and can be used safely + * * false if @range is NULL or has been invalidated + */ bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range) { if (!range) @@ -236,6 +249,17 @@ bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range) range->hmm_range.notifier_seq); } +/** + * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range + * @bo: optional buffer object to associate with this HMM range + * + * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed. + * The reference count of the @bo is incremented. + * + * Return: + * Pointer to a newly allocated struct amdgpu_hmm_range on success, + * or NULL if memory allocation fails. + */ struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo) { struct amdgpu_hmm_range *range; @@ -248,6 +272,15 @@ struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo) return range; } +/** + * amdgpu_hmm_range_free - release an AMDGPU HMM range + * @range: pointer to the range object to free + * + * Releases all resources held by @range, including the associated + * hmm_pfns and the dropping reference of associated bo if any. + * + * Return: void + */ void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) { if (!range) -- cgit v1.2.3 From 036f18d0a257204d1df7fbbaf3778061a27e2f2e Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 15 Oct 2025 16:19:07 +0800 Subject: drm/amdgpu: check save count before RAS bad page saving It's possible that unit_num is larger than 0 but save_count is zero, since we do get bad page address but the address is invalid. Check unit_num and save_count together. Signed-off-by: Tao Zhou Reviewed-by: Candice Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bc3f34c2c2d6..60b42c652e7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3142,7 +3142,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, *new_cnt = unit_num; /* only new entries are saved */ - if (unit_num > 0) { + if (unit_num && save_count) { /*old asics only save pa to eeprom like before*/ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { if (amdgpu_ras_eeprom_append(control, -- cgit v1.2.3 From 7169e706c82d7bcb3ba4c8943faa32e26daf3523 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 31 Mar 2025 11:12:58 +0800 Subject: drm/amdgpu: Add ras module ip block to amdgpu discovery Add ras module ip block to amdgpu discovery. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 8 ++++++++ drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h | 2 ++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 84c9ff86b495..a5574e84694b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -380,7 +380,7 @@ int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block); int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block); -#define AMDGPU_MAX_IP_NUM 16 +#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM struct amdgpu_ip_block_status { bool valid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 4e75334f3b3a..3097b2946682 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -107,6 +107,7 @@ #include "vcn_v5_0_1.h" #include "jpeg_v5_0_0.h" #include "jpeg_v5_0_1.h" +#include "amdgpu_ras_mgr.h" #include "amdgpu_vpe.h" #if defined(CONFIG_DRM_AMD_ISP) @@ -2393,6 +2394,21 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) amdgpu_ip_version(adev, SDMA0_HWIP, 0)); return -EINVAL; } + + return 0; +} + +static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): + case IP_VERSION(13, 0, 14): + amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block); + break; + default: + break; + } return 0; } @@ -3173,6 +3189,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (r) return r; + r = amdgpu_discovery_set_ras_ip_blocks(adev); + if (r) + return r; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) || (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 75efda2969cf..17945094a138 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -109,6 +109,7 @@ enum amd_ip_block_type { AMD_IP_BLOCK_TYPE_VPE, AMD_IP_BLOCK_TYPE_UMSCH_MM, AMD_IP_BLOCK_TYPE_ISP, + AMD_IP_BLOCK_TYPE_RAS, AMD_IP_BLOCK_TYPE_NUM, }; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index a8d02bd42f90..3ae843d078d8 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -381,6 +381,14 @@ static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = { .hw_fini = amdgpu_ras_mgr_hw_fini, }; +const struct amdgpu_ip_block_version ras_v1_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_RAS, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &ras_v1_0_ip_funcs, +}; + int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index 8d6a1873b666..814b65ef1c62 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -54,6 +54,8 @@ struct amdgpu_ras_mgr { bool ras_is_ready; }; +extern const struct amdgpu_ip_block_version ras_v1_0_ip_block; + struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context( struct amdgpu_device *adev); int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable); -- cgit v1.2.3 From 46791d147d3ab3262298478106ef2a52fc7192e2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 16 Oct 2025 14:51:17 +0800 Subject: drm/amd: Fix set but not used warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many set but not used warnings under drivers/gpu/drm/amd when compiling with the latest upstream mainline GCC: drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:305:18: warning: variable ‘p’ set but not used [-Wunused-but-set-variable=] drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h:103:26: warning: variable ‘internal_reg_offset’ set but not used [-Wunused-but-set-variable=] ... drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h:164:26: warning: variable ‘internal_reg_offset’ set but not used [-Wunused-but-set-variable=] ... drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:445:13: warning: variable ‘pipe_idx’ set but not used [-Wunused-but-set-variable=] drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:875:21: warning: variable ‘pipe_idx’ set but not used [-Wunused-but-set-variable=] Remove the variables actually not used or add __maybe_unused attribute for the variables actually used to fix them, compile tested only. Signed-off-by: Tiezhu Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 6 ++++-- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 9 +++------ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index b2033f8352f5..83f3b94ed975 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -302,7 +302,6 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; - unsigned p; int i, j; u64 page_base; /* Starting from VEGA10, system bit must be 0 to mean invalid. */ @@ -316,8 +315,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, return; t = offset / AMDGPU_GPU_PAGE_SIZE; - p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; - for (i = 0; i < pages; i++, p++) { + for (i = 0; i < pages; i++) { page_base = adev->dummy_page_addr; if (!adev->gart.ptr) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index dc8a17bcc3c8..82624b44e661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -100,7 +100,8 @@ #define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ - uint32_t internal_reg_offset, addr; \ + /* To avoid a -Wunused-but-set-variable warning. */ \ + uint32_t internal_reg_offset __maybe_unused, addr; \ bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ @@ -161,7 +162,8 @@ #define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ - uint32_t internal_reg_offset, addr; \ + /* To avoid a -Wunused-but-set-variable warning. */ \ + uint32_t internal_reg_offset __maybe_unused, addr; \ bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c75663aefcf3..4b20c01bf646 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -442,7 +442,6 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru int i = 0, k = 0; int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it. uint8_t visual_confirm_enabled; - int pipe_idx = 0; struct dc_stream_status *stream_status = NULL; if (dc == NULL) @@ -457,7 +456,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.visual_confirm_enabled = visual_confirm_enabled; if (should_manage_pstate) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->stream) @@ -472,7 +471,6 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us; break; } - pipe_idx++; } } @@ -872,7 +870,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, bool enable) { uint8_t cmd_pipe_index = 0; - uint32_t i, pipe_idx; + uint32_t i; uint8_t subvp_count = 0; union dmub_rb_cmd cmd; struct pipe_ctx *subvp_pipes[2]; @@ -899,7 +897,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, if (enable) { // For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); @@ -922,7 +920,6 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); } - pipe_idx++; } if (subvp_count == 2) { update_subvp_prefetch_end_to_mall_start(dc, context, &cmd, subvp_pipes); -- cgit v1.2.3 From 6588766d08e35b692d839194af581f06cb7b77c6 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 20 Mar 2025 17:04:14 +0800 Subject: drm/amdgpu: Enable ras module Enable ras module, disabled by default. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index 3ae843d078d8..13c207c8a843 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -335,6 +335,11 @@ static int amdgpu_ras_mgr_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); int ret; + /* Currently only debug mode can enable the ras module + */ + if (!adev->debug_enable_ras_aca) + return 0; + if (!ras_mgr || !ras_mgr->ras_core) return -EINVAL; @@ -346,6 +351,8 @@ static int amdgpu_ras_mgr_hw_init(struct amdgpu_ip_block *ip_block) ras_mgr->ras_is_ready = true; + amdgpu_enable_uniras(adev, true); + RAS_DEV_INFO(adev, "AMDGPU RAS Is Ready.\n"); return 0; } @@ -355,6 +362,11 @@ static int amdgpu_ras_mgr_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + /* Currently only debug mode can enable the ras module + */ + if (!adev->debug_enable_ras_aca) + return 0; + if (!ras_mgr || !ras_mgr->ras_core) return -EINVAL; -- cgit v1.2.3 From 919c835027e2530e6fad3957d7291ffb3bba1b00 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 3 Oct 2025 16:06:53 -0400 Subject: drm/amd/display: Fix GFP_ATOMIC abuse There is a lot GFP_ATOMIC allocations which are not in interrupt context. Change them to use GFP_KERNEL instead. Reviewed-by: Leo Li Signed-off-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- .../gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c | 6 ++--- .../amd/display/dc/resource/dcn20/dcn20_resource.c | 28 ++++++++++---------- .../display/dc/resource/dcn201/dcn201_resource.c | 30 ++++++++++------------ 6 files changed, 33 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3ea17a3d307d..e3be2620f959 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10592,7 +10592,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * Here we create an empty update on each plane. * To fix this, DC should permit updating only stream properties. */ - dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); + dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_KERNEL); if (!dummy_updates) { drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n"); continue; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c index 5999b2da3a01..33d8bd91cb01 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c @@ -148,7 +148,7 @@ struct dccg *dccg2_create( const struct dccg_shift *dccg_shift, const struct dccg_mask *dccg_mask) { - struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_ATOMIC); + struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL); struct dccg *base; if (dccg_dcn == NULL) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index a6006776333d..2dcf394edf22 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -283,7 +283,7 @@ struct abm *dce_abm_create( const struct dce_abm_shift *abm_shift, const struct dce_abm_mask *abm_mask) { - struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_ATOMIC); + struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL); if (abm_dce == NULL) { BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index a8e79104b684..5f8fba45d98d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -1126,7 +1126,7 @@ struct dmcu *dcn10_dmcu_create( const struct dce_dmcu_shift *dmcu_shift, const struct dce_dmcu_mask *dmcu_mask) { - struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC); + struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL); if (dmcu_dce == NULL) { BREAK_TO_DEBUGGER(); @@ -1147,7 +1147,7 @@ struct dmcu *dcn20_dmcu_create( const struct dce_dmcu_shift *dmcu_shift, const struct dce_dmcu_mask *dmcu_mask) { - struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC); + struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL); if (dmcu_dce == NULL) { BREAK_TO_DEBUGGER(); @@ -1168,7 +1168,7 @@ struct dmcu *dcn21_dmcu_create( const struct dce_dmcu_shift *dmcu_shift, const struct dce_dmcu_mask *dmcu_mask) { - struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC); + struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL); if (dmcu_dce == NULL) { BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index f4d3ff79717f..30f155dc54e5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -733,7 +733,7 @@ struct dpp *dcn20_dpp_create( uint32_t inst) { struct dcn20_dpp *dpp = - kzalloc(sizeof(struct dcn20_dpp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_dpp), GFP_KERNEL); if (!dpp) return NULL; @@ -751,7 +751,7 @@ struct input_pixel_processor *dcn20_ipp_create( struct dc_context *ctx, uint32_t inst) { struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); if (!ipp) { BREAK_TO_DEBUGGER(); @@ -768,7 +768,7 @@ struct output_pixel_processor *dcn20_opp_create( struct dc_context *ctx, uint32_t inst) { struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); if (!opp) { BREAK_TO_DEBUGGER(); @@ -785,7 +785,7 @@ struct dce_aux *dcn20_aux_engine_create( uint32_t inst) { struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); if (!aux_engine) return NULL; @@ -823,7 +823,7 @@ struct dce_i2c_hw *dcn20_i2c_hw_create( uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); if (!dce_i2c_hw) return NULL; @@ -835,8 +835,7 @@ struct dce_i2c_hw *dcn20_i2c_hw_create( } struct mpc *dcn20_mpc_create(struct dc_context *ctx) { - struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), - GFP_ATOMIC); + struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), GFP_KERNEL); if (!mpc20) return NULL; @@ -853,8 +852,7 @@ struct mpc *dcn20_mpc_create(struct dc_context *ctx) struct hubbub *dcn20_hubbub_create(struct dc_context *ctx) { int i; - struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), - GFP_ATOMIC); + struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); if (!hubbub) return NULL; @@ -882,7 +880,7 @@ struct timing_generator *dcn20_timing_generator_create( uint32_t instance) { struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_ATOMIC); + kzalloc(sizeof(struct optc), GFP_KERNEL); if (!tgn10) return NULL; @@ -962,7 +960,7 @@ static struct clock_source *dcn20_clock_source_create( bool dp_clk_src) { struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); if (!clk_src) return NULL; @@ -1061,7 +1059,7 @@ struct display_stream_compressor *dcn20_dsc_create( struct dc_context *ctx, uint32_t inst) { struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); if (!dsc) { BREAK_TO_DEBUGGER(); @@ -1198,7 +1196,7 @@ struct hubp *dcn20_hubp_create( uint32_t inst) { struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); if (!hubp2) return NULL; @@ -2287,7 +2285,7 @@ bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) { - struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_ATOMIC); + struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); if (!pp_smu) return pp_smu; @@ -2766,7 +2764,7 @@ struct resource_pool *dcn20_create_resource_pool( struct dc *dc) { struct dcn20_resource_pool *pool = - kzalloc(sizeof(struct dcn20_resource_pool), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_resource_pool), GFP_KERNEL); if (!pool) return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index e4a1338d21e0..9a80bebcee48 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -629,7 +629,7 @@ static struct dpp *dcn201_dpp_create( uint32_t inst) { struct dcn201_dpp *dpp = - kzalloc(sizeof(struct dcn201_dpp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn201_dpp), GFP_KERNEL); if (!dpp) return NULL; @@ -646,7 +646,7 @@ static struct input_pixel_processor *dcn201_ipp_create( struct dc_context *ctx, uint32_t inst) { struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); if (!ipp) { return NULL; @@ -662,7 +662,7 @@ static struct output_pixel_processor *dcn201_opp_create( struct dc_context *ctx, uint32_t inst) { struct dcn201_opp *opp = - kzalloc(sizeof(struct dcn201_opp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn201_opp), GFP_KERNEL); if (!opp) { return NULL; @@ -677,7 +677,7 @@ static struct dce_aux *dcn201_aux_engine_create(struct dc_context *ctx, uint32_t inst) { struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); if (!aux_engine) return NULL; @@ -710,7 +710,7 @@ static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, uint32_t inst) { struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); if (!dce_i2c_hw) return NULL; @@ -723,8 +723,7 @@ static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, static struct mpc *dcn201_mpc_create(struct dc_context *ctx, uint32_t num_mpcc) { - struct dcn201_mpc *mpc201 = kzalloc(sizeof(struct dcn201_mpc), - GFP_ATOMIC); + struct dcn201_mpc *mpc201 = kzalloc(sizeof(struct dcn201_mpc), GFP_KERNEL); if (!mpc201) return NULL; @@ -740,8 +739,7 @@ static struct mpc *dcn201_mpc_create(struct dc_context *ctx, uint32_t num_mpcc) static struct hubbub *dcn201_hubbub_create(struct dc_context *ctx) { - struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), - GFP_ATOMIC); + struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); if (!hubbub) return NULL; @@ -759,7 +757,7 @@ static struct timing_generator *dcn201_timing_generator_create( uint32_t instance) { struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_ATOMIC); + kzalloc(sizeof(struct optc), GFP_KERNEL); if (!tgn10) return NULL; @@ -793,7 +791,7 @@ static struct link_encoder *dcn201_link_encoder_create( const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_ATOMIC); + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); struct dcn10_link_encoder *enc10; if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) @@ -821,7 +819,7 @@ static struct clock_source *dcn201_clock_source_create( bool dp_clk_src) { struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); if (!clk_src) return NULL; @@ -856,7 +854,7 @@ static struct stream_encoder *dcn201_stream_encoder_create( struct dc_context *ctx) { struct dcn10_stream_encoder *enc1 = - kzalloc(sizeof(struct dcn10_stream_encoder), GFP_ATOMIC); + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); if (!enc1) return NULL; @@ -883,7 +881,7 @@ static const struct dce_hwseq_mask hwseq_mask = { static struct dce_hwseq *dcn201_hwseq_create( struct dc_context *ctx) { - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_ATOMIC); + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); if (hws) { hws->ctx = ctx; @@ -983,7 +981,7 @@ static struct hubp *dcn201_hubp_create( uint32_t inst) { struct dcn201_hubp *hubp201 = - kzalloc(sizeof(struct dcn201_hubp), GFP_ATOMIC); + kzalloc(sizeof(struct dcn201_hubp), GFP_KERNEL); if (!hubp201) return NULL; @@ -1303,7 +1301,7 @@ struct resource_pool *dcn201_create_resource_pool( struct dc *dc) { struct dcn201_resource_pool *pool = - kzalloc(sizeof(struct dcn201_resource_pool), GFP_ATOMIC); + kzalloc(sizeof(struct dcn201_resource_pool), GFP_KERNEL); if (!pool) return NULL; -- cgit v1.2.3 From e0550a1e30e4c6c03dbb5d38f4833fea1d170809 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 6 Oct 2025 22:02:31 -0400 Subject: drm/amd/display: Check disable_fec flag before enabling fec. [Why] dc debug option disable_fec was not working. [How] Check dc debug option disable_fec flag before enabling fec in dp_should_enable_fec(). Reviewed-by: Wenjing Liu Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index c33a8c17c38a..701afd2d4ab1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -357,7 +357,9 @@ bool dp_should_enable_fec(const struct dc_link *link) { bool force_disable = false; - if (link->fec_state == dc_link_fec_enabled) + if (link->dc->debug.disable_fec) + force_disable = true; + else if (link->fec_state == dc_link_fec_enabled) force_disable = false; else if (link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && link->local_sink && -- cgit v1.2.3 From 0a013785a26d0a001d8930ecd789d382ce77ca99 Mon Sep 17 00:00:00 2001 From: Nicholas Carbones Date: Fri, 3 Oct 2025 18:36:18 -0400 Subject: drm/amd/display: Set DCN32 to use update planes and stream version 3 [Why] Old minimal transition does not always wait for updates to complete before proceeding, which can lead to corruption in multi display scenarios for DCN32. [How] Set DCN32 to use update_planes_and_stream_v3 for better pipe transition handling. Reviewed-by: Dillon Varone Signed-off-by: Nicholas Carbones Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 36b046611d02..4c964cf28f68 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5376,7 +5376,8 @@ bool dc_update_planes_and_stream(struct dc *dc, * specially handle compatibility problems with transitions among those * features as they are now transparent to the new sequence. */ - if (dc->ctx->dce_version >= DCN_VERSION_4_01) + if (dc->ctx->dce_version >= DCN_VERSION_4_01 || dc->ctx->dce_version == DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_21) ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); else -- cgit v1.2.3 From d8ed14f1dc679c39253183d41b577a2c13756a1b Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Tue, 7 Oct 2025 16:46:59 +0800 Subject: drm/amd/display: Change clean dsc blocks condition in accelerated mode [Why] On system resume from S4 with the lid closed, DSC was not cleared because DPMS was already off. [How] In accelerated mode, to clean up DSC blocks if eDP dpms off is true to align the DSC and dpms state when we are not in fast boot and seamless boot. Reviewed-by: Wenjing Liu Signed-off-by: Lewis Huang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 24184b4eb352..6b0566baa2f2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1913,6 +1913,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + bool should_clean_dsc_block = true; struct dc_bios *dcb = dc->ctx->dc_bios; DC_LOGGER_INIT(); @@ -2005,9 +2006,15 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) power_down_all_hw_blocks(dc); /* DSC could be enabled on eDP during VBIOS post. - * To clean up dsc blocks if eDP is in link but not active. + * To clean up dsc blocks if all eDP dpms_off is true. */ - if (edp_link_with_sink && (edp_stream_num == 0)) + for (i = 0; i < edp_stream_num; i++) { + if (!edp_streams[i]->dpms_off) { + should_clean_dsc_block = false; + } + } + + if (should_clean_dsc_block) clean_up_dsc_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); -- cgit v1.2.3 From d021bd48ee62ed5385ba940ba7e596ee0308c47b Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 7 Oct 2025 16:34:09 -0400 Subject: drm/amd/display: Fix misc. checkpatch issues [Why/How] Addresses various checkpatch issues related to the HWSS block sequence function change. Reviewed-by: Aurabindo Pillai Signed-off-by: Ilya Bakoulin Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 57 +++----- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 160 +++++++++------------ 2 files changed, 86 insertions(+), 131 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 16d916986fed..f95cb0cf4b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -1962,9 +1962,8 @@ void hwss_program_bias_and_scale(union block_sequence_params *params) struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; //TODO :for CNVC set scale and bias registers if necessary - if (dpp->funcs->dpp_program_bias_and_scale) { + if (dpp->funcs->dpp_program_bias_and_scale) dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); - } } void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) @@ -2121,6 +2120,7 @@ void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *c void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) { int i; + for (i = 0; i < MAX_PIPES; i++) { int count = 0; struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -2277,6 +2277,7 @@ void hwss_tg_set_dsc_config(union block_sequence_params *params) if (params->tg_set_dsc_config_params.enable) { struct dsc_optc_config *dsc_optc_cfg = params->tg_set_dsc_config_params.dsc_optc_cfg; + if (dsc_optc_cfg) { bytes_per_pixel = dsc_optc_cfg->bytes_per_pixel; slice_width = dsc_optc_cfg->slice_width; @@ -2370,9 +2371,8 @@ void hwss_tg_wait_for_state(union block_sequence_params *params) struct timing_generator *tg = params->tg_wait_for_state_params.tg; enum crtc_state state = params->tg_wait_for_state_params.state; - if (tg->funcs->wait_for_state) { + if (tg->funcs->wait_for_state) tg->funcs->wait_for_state(tg, state); - } } void hwss_tg_set_vtg_params(union block_sequence_params *params) @@ -2381,9 +2381,8 @@ void hwss_tg_set_vtg_params(union block_sequence_params *params) struct dc_crtc_timing *timing = params->tg_set_vtg_params_params.timing; bool program_fp2 = params->tg_set_vtg_params_params.program_fp2; - if (tg->funcs->set_vtg_params) { + if (tg->funcs->set_vtg_params) tg->funcs->set_vtg_params(tg, timing, program_fp2); - } } void hwss_tg_setup_vertical_interrupt2(union block_sequence_params *params) @@ -2391,9 +2390,8 @@ void hwss_tg_setup_vertical_interrupt2(union block_sequence_params *params) struct timing_generator *tg = params->tg_setup_vertical_interrupt2_params.tg; int start_line = params->tg_setup_vertical_interrupt2_params.start_line; - if (tg->funcs->setup_vertical_interrupt2) { + if (tg->funcs->setup_vertical_interrupt2) tg->funcs->setup_vertical_interrupt2(tg, start_line); - } } void hwss_dpp_set_hdr_multiplier(union block_sequence_params *params) @@ -2401,9 +2399,8 @@ void hwss_dpp_set_hdr_multiplier(union block_sequence_params *params) struct dpp *dpp = params->dpp_set_hdr_multiplier_params.dpp; uint32_t hw_mult = params->dpp_set_hdr_multiplier_params.hw_mult; - if (dpp->funcs->dpp_set_hdr_multiplier) { + if (dpp->funcs->dpp_set_hdr_multiplier) dpp->funcs->dpp_set_hdr_multiplier(dpp, hw_mult); - } } void hwss_program_det_size(union block_sequence_params *params) @@ -2412,9 +2409,8 @@ void hwss_program_det_size(union block_sequence_params *params) unsigned int hubp_inst = params->program_det_size_params.hubp_inst; unsigned int det_buffer_size_kb = params->program_det_size_params.det_buffer_size_kb; - if (hubbub->funcs->program_det_size) { + if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp_inst, det_buffer_size_kb); - } } void hwss_program_det_segments(union block_sequence_params *params) @@ -2423,9 +2419,8 @@ void hwss_program_det_segments(union block_sequence_params *params) unsigned int hubp_inst = params->program_det_segments_params.hubp_inst; unsigned int det_size = params->program_det_segments_params.det_size; - if (hubbub->funcs->program_det_segments) { + if (hubbub->funcs->program_det_segments) hubbub->funcs->program_det_segments(hubbub, hubp_inst, det_size); - } } void hwss_opp_set_dyn_expansion(union block_sequence_params *params) @@ -2435,9 +2430,8 @@ void hwss_opp_set_dyn_expansion(union block_sequence_params *params) enum dc_color_depth color_depth = params->opp_set_dyn_expansion_params.color_depth; enum signal_type signal = params->opp_set_dyn_expansion_params.signal; - if (opp->funcs->opp_set_dyn_expansion) { + if (opp->funcs->opp_set_dyn_expansion) opp->funcs->opp_set_dyn_expansion(opp, color_space, color_depth, signal); - } } void hwss_opp_program_fmt(union block_sequence_params *params) @@ -2446,9 +2440,8 @@ void hwss_opp_program_fmt(union block_sequence_params *params) struct bit_depth_reduction_params *fmt_bit_depth = params->opp_program_fmt_params.fmt_bit_depth; struct clamping_and_pixel_encoding_params *clamping = params->opp_program_fmt_params.clamping; - if (opp->funcs->opp_program_fmt) { + if (opp->funcs->opp_program_fmt) opp->funcs->opp_program_fmt(opp, fmt_bit_depth, clamping); - } } void hwss_opp_program_bit_depth_reduction(union block_sequence_params *params) @@ -2458,15 +2451,13 @@ void hwss_opp_program_bit_depth_reduction(union block_sequence_params *params) struct pipe_ctx *pipe_ctx = params->opp_program_bit_depth_reduction_params.pipe_ctx; struct bit_depth_reduction_params bit_depth_params; - if (use_default_params) { + if (use_default_params) memset(&bit_depth_params, 0, sizeof(bit_depth_params)); - } else { + else resource_build_bit_depth_reduction_params(pipe_ctx->stream, &bit_depth_params); - } - if (opp->funcs->opp_program_bit_depth_reduction) { + if (opp->funcs->opp_program_bit_depth_reduction) opp->funcs->opp_program_bit_depth_reduction(opp, &bit_depth_params); - } } void hwss_opp_set_disp_pattern_generator(union block_sequence_params *params) @@ -2500,9 +2491,8 @@ void hwss_set_abm_level(union block_sequence_params *params) struct abm *abm = params->set_abm_level_params.abm; unsigned int abm_level = params->set_abm_level_params.abm_level; - if (abm->funcs->set_abm_level) { + if (abm->funcs->set_abm_level) abm->funcs->set_abm_level(abm, abm_level); - } } void hwss_set_abm_immediate_disable(union block_sequence_params *params) @@ -2510,9 +2500,8 @@ void hwss_set_abm_immediate_disable(union block_sequence_params *params) struct dc *dc = params->set_abm_immediate_disable_params.dc; struct pipe_ctx *pipe_ctx = params->set_abm_immediate_disable_params.pipe_ctx; - if (dc && dc->hwss.set_abm_immediate_disable) { + if (dc && dc->hwss.set_abm_immediate_disable) dc->hwss.set_abm_immediate_disable(pipe_ctx); - } } void hwss_mpc_remove_mpcc(union block_sequence_params *params) @@ -2793,34 +2782,29 @@ void hwss_hubp_init(union block_sequence_params *params) { struct hubp *hubp = params->hubp_init_params.hubp; - if (hubp && hubp->funcs->hubp_init) { + if (hubp && hubp->funcs->hubp_init) hubp->funcs->hubp_init(hubp); - } } void hwss_hubp_set_vm_system_aperture_settings(union block_sequence_params *params) { struct hubp *hubp = params->hubp_set_vm_system_aperture_settings_params.hubp; - //struct vm_system_aperture_param *apt = ¶ms->hubp_set_vm_system_aperture_settings_params.apt; struct vm_system_aperture_param apt; apt.sys_default = params->hubp_set_vm_system_aperture_settings_params.sys_default; apt.sys_high = params->hubp_set_vm_system_aperture_settings_params.sys_high; apt.sys_low = params->hubp_set_vm_system_aperture_settings_params.sys_low; - if (hubp && hubp->funcs->hubp_set_vm_system_aperture_settings) { - //hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, apt); + if (hubp && hubp->funcs->hubp_set_vm_system_aperture_settings) hubp->funcs->hubp_set_vm_system_aperture_settings(hubp, &apt); - } } void hwss_hubp_set_flip_int(union block_sequence_params *params) { struct hubp *hubp = params->hubp_set_flip_int_params.hubp; - if (hubp && hubp->funcs->hubp_set_flip_int) { + if (hubp && hubp->funcs->hubp_set_flip_int) hubp->funcs->hubp_set_flip_int(hubp); - } } void hwss_dpp_dppclk_control(union block_sequence_params *params) @@ -3049,13 +3033,10 @@ void hwss_mpc_update_blending(union block_sequence_params *params) void hwss_mpc_assert_idle_mpcc(union block_sequence_params *params) { struct mpc *mpc = params->mpc_assert_idle_mpcc_params.mpc; - //struct pipe_ctx *pipe_ctx = params->mpc_assert_idle_mpcc_params.pipe_ctx; int mpcc_id = params->mpc_assert_idle_mpcc_params.mpcc_id; if (mpc && mpc->funcs->wait_for_idle) mpc->funcs->wait_for_idle(mpc, mpcc_id); - - //pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_id] = false; } void hwss_mpc_insert_plane(union block_sequence_params *params) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index e6d3ff8598f5..23ecab4bcbba 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2271,17 +2271,15 @@ void dcn401_program_pipe_sequence( (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines); /* Step 2: Wait for VACTIVE state (if not phantom pipe) */ - if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) hwss_add_tg_wait_for_state(seq_state, pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); - } /* Step 3: Set VTG params */ hwss_add_tg_set_vtg_params(seq_state, pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); /* Step 4: Setup vupdate interrupt (if available) */ - if (hws->funcs.setup_vupdate_interrupt) { + if (hws->funcs.setup_vupdate_interrupt) dcn401_setup_vupdate_interrupt_sequence(dc, pipe_ctx, seq_state); - } } if (pipe_ctx->update_flags.bits.odm) { @@ -2347,9 +2345,11 @@ void dcn401_program_pipe_sequence( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed) { - hwss_add_opp_set_dyn_expansion(seq_state, pipe_ctx->stream_res.opp, COLOR_SPACE_YCBCR601, pipe_ctx->stream->timing.display_color_depth, pipe_ctx->stream->signal); + hwss_add_opp_set_dyn_expansion(seq_state, pipe_ctx->stream_res.opp, COLOR_SPACE_YCBCR601, + pipe_ctx->stream->timing.display_color_depth, pipe_ctx->stream->signal); - hwss_add_opp_program_fmt(seq_state, pipe_ctx->stream_res.opp, &pipe_ctx->stream->bit_depth_params, &pipe_ctx->stream->clamping); + hwss_add_opp_program_fmt(seq_state, pipe_ctx->stream_res.opp, + &pipe_ctx->stream->bit_depth_params, &pipe_ctx->stream->clamping); } /* Set ABM pipe after other pipe configurations done */ @@ -2366,7 +2366,16 @@ void dcn401_program_pipe_sequence( hwss_add_opp_program_bit_depth_reduction(seq_state, odm_opp, true, pipe_ctx); - hwss_add_opp_set_disp_pattern_generator(seq_state, odm_opp, pipe_ctx->stream_res.test_pattern_params.test_pattern, pipe_ctx->stream_res.test_pattern_params.color_space, pipe_ctx->stream_res.test_pattern_params.color_depth, (struct tg_color){0}, false, pipe_ctx->stream_res.test_pattern_params.width, pipe_ctx->stream_res.test_pattern_params.height, pipe_ctx->stream_res.test_pattern_params.offset); + hwss_add_opp_set_disp_pattern_generator(seq_state, + odm_opp, + pipe_ctx->stream_res.test_pattern_params.test_pattern, + pipe_ctx->stream_res.test_pattern_params.color_space, + pipe_ctx->stream_res.test_pattern_params.color_depth, + (struct tg_color){0}, + false, + pipe_ctx->stream_res.test_pattern_params.width, + pipe_ctx->stream_res.test_pattern_params.height, + pipe_ctx->stream_res.test_pattern_params.offset); } } @@ -3025,9 +3034,8 @@ void dcn401_plane_atomic_power_down_sequence(struct dc *dc, /* Check and set DC_IP_REQUEST_CNTL if needed */ if (REG(DC_IP_REQUEST_CNTL)) { REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) { + if (org_ip_request_cntl == 0) hwss_add_dc_ip_request_cntl(seq_state, dc, true); - } } /* DPP power gating control */ @@ -3043,9 +3051,8 @@ void dcn401_plane_atomic_power_down_sequence(struct dc *dc, hwss_add_dpp_reset(seq_state, dpp); /* Restore DC_IP_REQUEST_CNTL if it was originally 0 */ - if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL)) { + if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL)) hwss_add_dc_ip_request_cntl(seq_state, dc, false); - } DC_LOG_DEBUG("Power gated front end %d\n", hubp->inst); @@ -3087,14 +3094,12 @@ void dcn401_plane_atomic_disconnect_sequence(struct dc *dc, hwss_add_dc_set_optimized_required(seq_state, dc, true); /* Step 4: Disconnect HUBP if function exists */ - if (hubp->funcs->hubp_disconnect) { + if (hubp->funcs->hubp_disconnect) hwss_add_hubp_disconnect(seq_state, hubp); - } /* Step 5: Verify pstate change high if debug sanity checks are enabled */ - if (dc->debug.sanity_checks) { + if (dc->debug.sanity_checks) dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); - } } void dcn401_blank_pixel_data_sequence( @@ -3418,14 +3423,12 @@ void dcn401_disable_plane_sequence( /* In flip immediate with pipe splitting case GSL is used for synchronization * so we must disable it when the plane is disabled. */ - if (pipe_ctx->stream_res.gsl_group != 0) { + if (pipe_ctx->stream_res.gsl_group != 0) dcn401_setup_gsl_group_as_lock_sequence(dc, pipe_ctx, false, seq_state); - } /* Update HUBP mall sel */ - if (pipe_ctx->plane_res.hubp && pipe_ctx->plane_res.hubp->funcs->hubp_update_mall_sel) { + if (pipe_ctx->plane_res.hubp && pipe_ctx->plane_res.hubp->funcs->hubp_update_mall_sel) hwss_add_hubp_update_mall_sel(seq_state, pipe_ctx->plane_res.hubp, 0, false); - } /* Set flip control GSL */ hwss_add_hubp_set_flip_control_gsl(seq_state, pipe_ctx->plane_res.hubp, false); @@ -3451,9 +3454,8 @@ void dcn401_disable_plane_sequence( pipe_ctx->plane_state = NULL; /* Turn back off the phantom OTG after the phantom plane is fully disabled */ - if (is_phantom && tg && tg->funcs->disable_phantom_crtc) { + if (is_phantom && tg && tg->funcs->disable_phantom_crtc) hwss_add_disable_phantom_crtc(seq_state, tg); - } } void dcn401_post_unlock_reset_opp_sequence( @@ -3473,15 +3475,14 @@ void dcn401_post_unlock_reset_opp_sequence( if (dsc) { bool *is_ungated = NULL; /* Check DSC power gate status */ - if (dc->hwseq && dc->hwseq->funcs.dsc_pg_status) { + if (dc->hwseq && dc->hwseq->funcs.dsc_pg_status) hwss_add_dsc_pg_status(seq_state, dc->hwseq, dsc->inst, false); - } /* Seamless update specific where we will postpone non - * double buffered DSCCLK disable logic in post unlock - * sequence after DSC is disconnected from OPP but not - * yet power gated. - */ + * double buffered DSCCLK disable logic in post unlock + * sequence after DSC is disconnected from OPP but not + * yet power gated. + */ /* DSC wait disconnect pending clear */ hwss_add_dsc_wait_disconnect_pending_clear(seq_state, dsc, is_ungated); @@ -3490,9 +3491,8 @@ void dcn401_post_unlock_reset_opp_sequence( hwss_add_dsc_disable(seq_state, dsc, is_ungated); /* Set reference DSCCLK */ - if (dccg && dccg->funcs->set_ref_dscclk) { + if (dccg && dccg->funcs->set_ref_dscclk) hwss_add_dccg_set_ref_dscclk(seq_state, dccg, dsc->inst, 0); - } } } @@ -3500,9 +3500,8 @@ void dcn401_dc_ip_request_cntl(struct dc *dc, bool enable) { struct dce_hwseq *hws = dc->hwseq; - if (REG(DC_IP_REQUEST_CNTL)) { + if (REG(DC_IP_REQUEST_CNTL)) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, enable ? 1 : 0); - } } void dcn401_enable_plane_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -3512,52 +3511,43 @@ void dcn401_enable_plane_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dce_hwseq *hws = dc->hwseq; uint32_t org_ip_request_cntl = 0; - if (!pipe_ctx->plane_res.dpp || !pipe_ctx->plane_res.hubp || !pipe_ctx->stream_res.opp) { + if (!pipe_ctx->plane_res.dpp || !pipe_ctx->plane_res.hubp || !pipe_ctx->stream_res.opp) return; - } if (REG(DC_IP_REQUEST_CNTL)) REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); /* Step 1: DPP root clock control - enable clock */ - if (hws->funcs.dpp_root_clock_control) { + if (hws->funcs.dpp_root_clock_control) hwss_add_dpp_root_clock_control(seq_state, hws, pipe_ctx->plane_res.dpp->inst, true); - } /* Step 2: Enable DC IP request (if needed) */ - if (hws->funcs.dc_ip_request_cntl) { + if (hws->funcs.dc_ip_request_cntl) hwss_add_dc_ip_request_cntl(seq_state, dc, true); - } /* Step 3: DPP power gating control - power on */ - if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.dpp_pg_control) { + if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.dpp_pg_control) hwss_add_dpp_pg_control(seq_state, hws, pipe_ctx->plane_res.dpp->inst, true); - } /* Step 4: HUBP power gating control - power on */ - if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.hubp_pg_control) { + if (REG(DC_IP_REQUEST_CNTL) && hws->funcs.hubp_pg_control) hwss_add_hubp_pg_control(seq_state, hws, pipe_ctx->plane_res.hubp->inst, true); - } /* Step 5: Disable DC IP request (restore state) */ - if (org_ip_request_cntl == 0 && hws->funcs.dc_ip_request_cntl) { + if (org_ip_request_cntl == 0 && hws->funcs.dc_ip_request_cntl) hwss_add_dc_ip_request_cntl(seq_state, dc, false); - } /* Step 6: HUBP clock control - enable DCFCLK */ - if (pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl) { + if (pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl) hwss_add_hubp_clk_cntl(seq_state, pipe_ctx->plane_res.hubp, true); - } /* Step 7: HUBP initialization */ - if (pipe_ctx->plane_res.hubp->funcs->hubp_init) { + if (pipe_ctx->plane_res.hubp->funcs->hubp_init) hwss_add_hubp_init(seq_state, pipe_ctx->plane_res.hubp); - } /* Step 8: OPP pipe clock control - enable */ - if (pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control) { + if (pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control) hwss_add_opp_pipe_clock_control(seq_state, pipe_ctx->stream_res.opp, true); - } /* Step 9: VM system aperture settings */ if (dc->vm_pa_config.valid && pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings) { @@ -3587,19 +3577,16 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, bool viewport_changed = false; enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx); - if (!hubp || !dpp || !plane_state) { + if (!hubp || !dpp || !plane_state) return; - } /* Step 1: DPP DPPCLK control */ - if (pipe_ctx->update_flags.bits.dppclk) { + if (pipe_ctx->update_flags.bits.dppclk) hwss_add_dpp_dppclk_control(seq_state, dpp, false, true); - } /* Step 2: DCCG update DPP DTO */ - if (pipe_ctx->update_flags.bits.enable) { + if (pipe_ctx->update_flags.bits.enable) hwss_add_dccg_update_dpp_dto(seq_state, dccg, dpp->inst, pipe_ctx->plane_res.bw.dppclk_khz); - } /* Step 3: HUBP VTG selection */ if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { @@ -3616,17 +3603,15 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, } /* Step 5: Set unbounded requesting */ - if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) { + if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting) hwss_add_hubp_set_unbounded_requesting(seq_state, hubp, pipe_ctx->unbounded_req); - } /* Step 6: HUBP interdependent setup */ if (pipe_ctx->update_flags.bits.hubp_interdependent) { - if (hubp->funcs->hubp_setup_interdependent2) { + if (hubp->funcs->hubp_setup_interdependent2) hwss_add_hubp_setup_interdependent2(seq_state, hubp, &pipe_ctx->hubp_regs); - } else if (hubp->funcs->hubp_setup_interdependent) { + else if (hubp->funcs->hubp_setup_interdependent) hwss_add_hubp_setup_interdependent(seq_state, hubp, &pipe_ctx->dlg_regs, &pipe_ctx->ttu_regs); - } } /* Step 7: DPP setup - input CSC and format setup */ @@ -3645,9 +3630,8 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, } /* Step 9: DPP program bias and scale */ - if (dpp->funcs->dpp_program_bias_and_scale) { + if (dpp->funcs->dpp_program_bias_and_scale) hwss_add_dpp_program_bias_and_scale(seq_state, pipe_ctx); - } } /* Step 10: MPCC updates */ @@ -3683,9 +3667,8 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, } /* Step 13: HUBP program mcache if available */ - if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) { + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) hwss_add_hubp_program_mcache_id(seq_state, hubp, &pipe_ctx->mcache_regs); - } /* Step 14: Cursor attribute setup */ if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || @@ -3698,9 +3681,8 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, hwss_add_set_cursor_position(seq_state, dc, pipe_ctx); /* Step 16: Cursor SDR white level */ - if (dc->hwss.set_cursor_sdr_white_level) { + if (dc->hwss.set_cursor_sdr_white_level) hwss_add_set_cursor_sdr_white_level(seq_state, dc, pipe_ctx); - } } /* Step 17: Gamut remap and output CSC */ @@ -3747,7 +3729,8 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, /* SubVP save surface address if needed */ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && pipe_mall_type == SUBVP_MAIN) { - hwss_add_dmub_subvp_save_surf_addr(seq_state, dc->ctx->dmub_srv, &pipe_ctx->plane_state->address, pipe_ctx->subvp_index); + hwss_add_dmub_subvp_save_surf_addr(seq_state, dc->ctx->dmub_srv, + &pipe_ctx->plane_state->address, pipe_ctx->subvp_index); } /* Update plane address */ @@ -3755,14 +3738,12 @@ void dcn401_update_dchubp_dpp_sequence(struct dc *dc, } /* Step 20: HUBP set blank - enable plane */ - if (pipe_ctx->update_flags.bits.enable) { + if (pipe_ctx->update_flags.bits.enable) hwss_add_hubp_set_blank(seq_state, hubp, false); - } /* Step 21: Phantom HUBP post enable */ - if (pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable) { + if (pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable) hwss_add_phantom_hubp_post_enable(seq_state, hubp); - } } void dcn401_update_mpcc_sequence(struct dc *dc, @@ -3777,9 +3758,8 @@ void dcn401_update_mpcc_sequence(struct dc *dc, struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (!hubp || !pipe_ctx->plane_state) { + if (!hubp || !pipe_ctx->plane_state) return; - } per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha; @@ -3837,9 +3817,8 @@ void dcn401_update_mpcc_sequence(struct dc *dc, hwss_add_mpc_remove_mpcc(seq_state, mpc, mpc_tree_params, new_mpcc); } else { /* Step 4: Assert MPCC idle (debug only) */ - if (dc->debug.sanity_checks) { + if (dc->debug.sanity_checks) hwss_add_mpc_assert_idle_mpcc(seq_state, mpc, mpcc_id); - } } /* Step 5: Insert new plane into MPC tree */ @@ -3873,9 +3852,8 @@ void dcn401_wait_for_mpcc_disconnect_sequence( { int mpcc_inst; - if (dc->debug.sanity_checks) { + if (dc->debug.sanity_checks) dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); - } if (!pipe_ctx->stream_res.opp) return; @@ -3889,15 +3867,13 @@ void dcn401_wait_for_mpcc_disconnect_sequence( hwss_add_mpc_assert_idle_mpcc(seq_state, res_pool->mpc, mpcc_inst); } pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; - if (hubp) { + if (hubp) hwss_add_hubp_set_blank(seq_state, hubp, true); - } } } - if (dc->debug.sanity_checks) { + if (dc->debug.sanity_checks) dc->hwseq->funcs.verify_allow_pstate_change_high_sequence(dc, seq_state); - } } void dcn401_setup_vupdate_interrupt_sequence(struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -3909,9 +3885,8 @@ void dcn401_setup_vupdate_interrupt_sequence(struct dc *dc, struct pipe_ctx *pip if (start_line < 0) start_line = 0; - if (tg->funcs->setup_vertical_interrupt2) { + if (tg->funcs->setup_vertical_interrupt2) hwss_add_tg_setup_vertical_interrupt2(seq_state, tg, start_line); - } } void dcn401_set_hdr_multiplier_sequence(struct pipe_ctx *pipe_ctx, @@ -3987,9 +3962,8 @@ void dcn401_program_mall_pipe_config_sequence(struct dc *dc, struct dc_state *co struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && hubp && hubp->funcs->hubp_prepare_subvp_buffering) { - if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) hwss_add_hubp_prepare_subvp_buffering(seq_state, hubp, true); - } } } } @@ -4020,11 +3994,11 @@ bool dcn401_hw_wa_force_recovery_sequence(struct dc *dc, /* Step 1: Set HUBP_BLANK_EN=1 for all active pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; - if (hubp != NULL && hubp->funcs->set_hubp_blank_en) { + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) hwss_add_hubp_set_blank_en(seq_state, hubp, true); - } } } @@ -4034,22 +4008,22 @@ bool dcn401_hw_wa_force_recovery_sequence(struct dc *dc, /* Step 3: Set HUBP_DISABLE=1 for all active pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; - if (hubp != NULL && hubp->funcs->hubp_disable_control) { + if (hubp != NULL && hubp->funcs->hubp_disable_control) hwss_add_hubp_disable_control(seq_state, hubp, true); - } } } /* Step 4: Set HUBP_DISABLE=0 for all active pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; - if (hubp != NULL && hubp->funcs->hubp_disable_control) { + if (hubp != NULL && hubp->funcs->hubp_disable_control) hwss_add_hubp_disable_control(seq_state, hubp, false); - } } } @@ -4059,11 +4033,11 @@ bool dcn401_hw_wa_force_recovery_sequence(struct dc *dc, /* Step 6: Set HUBP_BLANK_EN=0 for all active pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; - if (hubp != NULL && hubp->funcs->set_hubp_blank_en) { + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) hwss_add_hubp_set_blank_en(seq_state, hubp, false); - } } } -- cgit v1.2.3 From 1319fb80b31cd56d31cb08a334f7c6033f9609ac Mon Sep 17 00:00:00 2001 From: Cruise Hung Date: Wed, 8 Oct 2025 14:44:29 +0800 Subject: drm/amd/display: Control BW allocation in FW side [Why] The BW allocation feature should be controlled in FW side. [How] Pass the control bit to FW boot option. Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Nicholas Kazlauskas Signed-off-by: Cruise Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 3 +-- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 5 ----- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 1 + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 1 + 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ddd431ec4d52..089ff96b44c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -850,8 +850,7 @@ union dpia_debug_options { uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_usb4_bw_zero_alloc_patch:1; /* bit 6 */ - uint32_t enable_bw_allocation_mode:1; /* bit 7 */ - uint32_t reserved:24; + uint32_t reserved:25; } bits; uint32_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index b16eb97ae11c..c958d3f600c8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -225,11 +225,6 @@ bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) bool ret = false; uint8_t val; - if (link->dc->debug.dpia_debug.bits.enable_bw_allocation_mode == false) { - DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode disabled", __func__, link->link_index); - return false; - } - val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 9012a7ba1602..f25c2fc2f98f 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -318,6 +318,7 @@ struct dmub_srv_hw_params { bool enable_non_transparent_setconfig; bool lower_hbr3_phy_ssc; bool override_hbr3_pll_vco; + bool disable_dpia_bw_allocation; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 4777c7203b2c..cd04d7c756c3 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -380,6 +380,7 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.override_hbr3_pll_vco = params->override_hbr3_pll_vco; boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0; + boot_options.bits.disable_dpia_bw_allocation = params->disable_dpia_bw_allocation; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 834e5434ccb8..b40482dbd6ad 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -418,6 +418,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; + boot_options.bits.disable_dpia_bw_allocation = params->disable_dpia_bw_allocation; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } -- cgit v1.2.3 From a1362c405228c7aebe1430d84b76b34f09b262ed Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Wed, 8 Oct 2025 13:35:07 -0400 Subject: drm/amd/display: write segment pointer with mot enabled for mst [Why] Some mst branches NAK's segment pointer writes with mot disabled. So reset of segment pointer to 0 should be performed with mot enabled. [How] Write segment pointer of mst branch devices with mot enabled. Reviewed-by: Cruise Hung Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 3 --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 31a73867cd4c..f06af98d46ee 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -817,9 +817,6 @@ static bool construct_dpia(struct dc_link *link, link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; - /* Some docks seem to NAK I2C writes to segment pointer with mot=0. */ - link->wa_flags.dp_mot_reset_segment = true; - return true; ddc_create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 701afd2d4ab1..750147c52c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1867,6 +1867,12 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.is_mst_capable = read_is_mst_supported(link); DC_LOG_DC("%s: MST_Support: %s\n", __func__, str_yes_no(link->dpcd_caps.is_mst_capable)); + /* Some MST docks seem to NAK I2C writes to segment pointer with mot=0. */ + if (link->dpcd_caps.is_mst_capable) + link->wa_flags.dp_mot_reset_segment = true; + else + link->wa_flags.dp_mot_reset_segment = false; + get_active_converter_info(ds_port.byte, link); dp_wa_power_up_0010FA(link, dpcd_data, sizeof(dpcd_data)); -- cgit v1.2.3 From 7ce9d9c1904a2a49936434be01f706c014511de9 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Oct 2025 19:21:13 -0500 Subject: drm/amd/display: Promote DC to 3.2.355 This version brings along following update: -Fix GFP_ATOMIC abuse -Fix several checkpatch issues -Set DCN32 to use update planes and stream version 3 -Write segment pointer with mot enabled for MST -Control BW allocation in FW side -Change clean dsc blocks condition in accelerated mode -Check disable_fec flag before enabling FEC Acked-by: Wayne Lin Signed-off-by: Taimur Hassan Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 089ff96b44c1..66d9da5426f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.354" +#define DC_VER "3.2.355" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From 4c74635afdceffc1f7be609afcff11dd6c94b836 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 25 Sep 2025 16:12:28 +0800 Subject: drm/amd/ras: Update ras command context structure name According to the actual usage of this structure, it is more appropriate to call it context, the structure name with ioctl is easy to cause misunderstanding. V2: Update commit message content. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c | 14 +++++++------- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h | 2 +- drivers/gpu/drm/amd/ras/rascore/ras_cmd.c | 20 ++++++++++---------- drivers/gpu/drm/amd/ras/rascore/ras_cmd.h | 8 ++++---- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c index 4706e737969a..6a281ad8e255 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c @@ -37,7 +37,7 @@ #define AMDGPU_RAS_TYPE_VF 0x3 static int amdgpu_ras_query_interface_info(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd) + struct ras_cmd_ctx *cmd) { struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; struct ras_query_interface_info_rsp *output_data = @@ -77,7 +77,7 @@ static struct ras_core_context *ras_cmd_get_ras_core(uint64_t dev_handle) } static int amdgpu_ras_get_devices_info(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd) + struct ras_cmd_ctx *cmd) { struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; struct ras_cmd_devices_info_rsp *output_data = @@ -146,7 +146,7 @@ static uint64_t local_addr_to_xgmi_global_addr(struct ras_core_context *ras_core } static int amdgpu_ras_inject_error(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; struct ras_cmd_inject_error_req *req = @@ -189,7 +189,7 @@ static int amdgpu_ras_inject_error(struct ras_core_context *ras_core, } static int amdgpu_ras_get_ras_safe_fb_addr_ranges(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; struct ras_cmd_dev_handle *input_data = @@ -259,7 +259,7 @@ static int ras_translate_fb_address(struct ras_core_context *ras_core, } static int amdgpu_ras_translate_fb_address(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_translate_fb_address_req *req_buff = (struct ras_cmd_translate_fb_address_req *)cmd->input_buff_raw; @@ -291,7 +291,7 @@ static struct ras_cmd_func_map amdgpu_ras_cmd_maps[] = { {RAS_CMD__TRANSLATE_FB_ADDRESS, amdgpu_ras_translate_fb_address}, }; -int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ioctl *cmd, void *data) +int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_func_map *ras_cmd = NULL; int i, res; @@ -314,7 +314,7 @@ int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ioct int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, uint8_t *cmd_buf, uint32_t buf_size) { - struct ras_cmd_ioctl *cmd = (struct ras_cmd_ioctl *)cmd_buf; + struct ras_cmd_ctx *cmd = (struct ras_cmd_ctx *)cmd_buf; struct ras_core_context *cmd_core = NULL; struct ras_cmd_dev_handle *cmd_handle = NULL; int timeout = 60; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h index 7017198f1bac..73832c28cb55 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h @@ -48,7 +48,7 @@ struct ras_cmd_translate_memory_fd_rsp { }; int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data); + struct ras_cmd_ctx *cmd, void *data); int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, uint8_t *cmd_buf, uint32_t buf_size); diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c index 6fe3b115986c..94e6d7420d94 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c @@ -43,7 +43,7 @@ static int ras_cmd_remove_device(struct ras_core_context *ras_core) } static int ras_get_block_ecc_info(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_block_ecc_info_req *input_data = (struct ras_cmd_block_ecc_info_req *)cmd->input_buff_raw; @@ -119,7 +119,7 @@ static int ras_cmd_get_group_bad_pages(struct ras_core_context *ras_core, } static int ras_cmd_get_bad_pages(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_bad_pages_info_req *input_data = (struct ras_cmd_bad_pages_info_req *)cmd->input_buff_raw; @@ -141,7 +141,7 @@ static int ras_cmd_get_bad_pages(struct ras_core_context *ras_core, } static int ras_cmd_clear_bad_page_info(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { if (cmd->input_size != sizeof(struct ras_cmd_dev_handle)) return RAS_CMD__ERROR_INVALID_INPUT_SIZE; @@ -156,7 +156,7 @@ static int ras_cmd_clear_bad_page_info(struct ras_core_context *ras_core, } static int ras_cmd_reset_all_error_counts(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { if (cmd->input_size != sizeof(struct ras_cmd_dev_handle)) return RAS_CMD__ERROR_INVALID_INPUT_SIZE; @@ -171,7 +171,7 @@ static int ras_cmd_reset_all_error_counts(struct ras_core_context *ras_core, } static int ras_cmd_get_cper_snapshot(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_cper_snapshot_rsp *output_data = (struct ras_cmd_cper_snapshot_rsp *)cmd->output_buff_raw; @@ -193,7 +193,7 @@ static int ras_cmd_get_cper_snapshot(struct ras_core_context *ras_core, } static int ras_cmd_get_cper_records(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_cper_record_req *req = (struct ras_cmd_cper_record_req *)cmd->input_buff_raw; @@ -253,7 +253,7 @@ static int ras_cmd_get_cper_records(struct ras_core_context *ras_core, } static int ras_cmd_get_batch_trace_snapshot(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_batch_trace_snapshot_rsp *rsp = (struct ras_cmd_batch_trace_snapshot_rsp *)cmd->output_buff_raw; @@ -275,7 +275,7 @@ static int ras_cmd_get_batch_trace_snapshot(struct ras_core_context *ras_core, } static int ras_cmd_get_batch_trace_records(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_batch_trace_record_req *input_data = (struct ras_cmd_batch_trace_record_req *)cmd->input_buff_raw; @@ -400,7 +400,7 @@ static enum ras_ta_error_type __get_ras_ta_err_type(enum ras_ecc_err_type error) } static int ras_cmd_inject_error(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_inject_error_req *req = (struct ras_cmd_inject_error_req *)cmd->input_buff_raw; @@ -441,7 +441,7 @@ static struct ras_cmd_func_map ras_cmd_maps[] = { }; int rascore_handle_cmd(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data) + struct ras_cmd_ctx *cmd, void *data) { struct ras_cmd_func_map *ras_cmd = NULL; int i; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h index 6df8c70f5ad8..751ed50b9584 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h @@ -101,7 +101,7 @@ enum ras_error_type { }; struct ras_core_context; -struct ras_cmd_ioctl; +struct ras_cmd_ctx; struct ras_cmd_mgr { struct list_head head; @@ -112,7 +112,7 @@ struct ras_cmd_mgr { struct ras_cmd_func_map { uint32_t cmd_id; int (*func)(struct ras_core_context *ras_core, - struct ras_cmd_ioctl *cmd, void *data); + struct ras_cmd_ctx *cmd, void *data); }; struct ras_device_bdf { @@ -133,7 +133,7 @@ struct ras_cmd_param { }; #pragma pack(push, 8) -struct ras_cmd_ioctl { +struct ras_cmd_ctx { uint32_t magic; union { struct { @@ -414,7 +414,7 @@ struct ras_cmd_batch_trace_record_rsp { int ras_cmd_init(struct ras_core_context *ras_core); int ras_cmd_fini(struct ras_core_context *ras_core); -int rascore_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ioctl *cmd, void *data); +int rascore_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd, void *data); uint64_t ras_cmd_get_dev_handle(struct ras_core_context *ras_core); int ras_cmd_query_interface_info(struct ras_core_context *ras_core, struct ras_query_interface_info_rsp *rsp); -- cgit v1.2.3 From 25c1e7414b2cabec5a9d539bc897a09774865f60 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 17 Oct 2025 15:27:56 +0800 Subject: drm/amd/ras: Update function and remove redundant code Update function and remove redundant code: 1. Update function to prepare for internal use. 2. Remove unused function code previously prepared for ioctl. V2: Update commit message content. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c | 110 ++++------------------- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h | 3 +- drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 31 +++++++ drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h | 3 + drivers/gpu/drm/amd/ras/rascore/ras_cmd.h | 3 +- drivers/gpu/drm/amd/ras/rascore/ras_core.c | 3 +- drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c | 29 ------ 7 files changed, 55 insertions(+), 127 deletions(-) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c index 6a281ad8e255..78419b7f7729 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c @@ -36,67 +36,6 @@ #define AMDGPU_RAS_TYPE_AMDGPU 0x2 #define AMDGPU_RAS_TYPE_VF 0x3 -static int amdgpu_ras_query_interface_info(struct ras_core_context *ras_core, - struct ras_cmd_ctx *cmd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; - struct ras_query_interface_info_rsp *output_data = - (struct ras_query_interface_info_rsp *)cmd->output_buff_raw; - int ret; - - if (cmd->input_size != sizeof(struct ras_query_interface_info_req)) - return RAS_CMD__ERROR_INVALID_INPUT_SIZE; - - ret = ras_cmd_query_interface_info(ras_core, output_data); - if (!ret) { - output_data->plat_major_ver = 0; - output_data->plat_minor_ver = 0; - - output_data->interface_type = amdgpu_sriov_vf(adev) ? - RAS_CMD_INTERFACE_TYPE_VF : RAS_CMD_INTERFACE_TYPE_AMDGPU; - - cmd->output_size = sizeof(struct ras_query_interface_info_rsp); - } - - return ret; -} - -static struct ras_core_context *ras_cmd_get_ras_core(uint64_t dev_handle) -{ - struct ras_core_context *ras_core; - - if (!dev_handle || (dev_handle == RAS_CMD_DEV_HANDLE_MAGIC)) - return NULL; - - ras_core = (struct ras_core_context *)(uintptr_t)(dev_handle ^ RAS_CMD_DEV_HANDLE_MAGIC); - - if (ras_cmd_get_dev_handle(ras_core) == dev_handle) - return ras_core; - - return NULL; -} - -static int amdgpu_ras_get_devices_info(struct ras_core_context *ras_core, - struct ras_cmd_ctx *cmd) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; - struct ras_cmd_devices_info_rsp *output_data = - (struct ras_cmd_devices_info_rsp *)cmd->output_buff_raw; - struct ras_cmd_dev_info *dev_info; - - dev_info = &output_data->devs[0]; - dev_info->dev_handle = ras_cmd_get_dev_handle(ras_core); - dev_info->oam_id = adev->smuio.funcs->get_socket_id(adev); - dev_info->ecc_enabled = 1; - dev_info->ecc_supported = 1; - - output_data->dev_num = 1; - output_data->version = 0; - cmd->output_size = sizeof(struct ras_cmd_devices_info_rsp); - - return 0; -} - static int amdgpu_ras_trigger_error_prepare(struct ras_core_context *ras_core, struct ras_cmd_inject_error_req *block_info) { @@ -311,51 +250,34 @@ int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx return res; } -int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, - uint8_t *cmd_buf, uint32_t buf_size) +int amdgpu_ras_submit_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd) { - struct ras_cmd_ctx *cmd = (struct ras_cmd_ctx *)cmd_buf; - struct ras_core_context *cmd_core = NULL; - struct ras_cmd_dev_handle *cmd_handle = NULL; + struct ras_core_context *cmd_core = ras_core; int timeout = 60; int res; cmd->cmd_res = RAS_CMD__ERROR_INVALID_CMD; cmd->output_size = 0; - if (!ras_core_is_enabled(ras_core)) + if (!ras_core_is_enabled(cmd_core)) return RAS_CMD__ERROR_ACCESS_DENIED; - if (cmd->cmd_id == RAS_CMD__QUERY_INTERFACE_INFO) { - cmd->cmd_res = amdgpu_ras_query_interface_info(ras_core, cmd); - } else if (cmd->cmd_id == RAS_CMD__GET_DEVICES_INFO) { - cmd->cmd_res = amdgpu_ras_get_devices_info(ras_core, cmd); - } else { - cmd_handle = (struct ras_cmd_dev_handle *)cmd->input_buff_raw; - cmd_core = ras_cmd_get_ras_core(cmd_handle->dev_handle); - if (!cmd_core) - return RAS_CMD__ERROR_INVALID_INPUT_DATA; - - while (ras_core_gpu_in_reset(cmd_core)) { - msleep(1000); - if (!timeout--) - return RAS_CMD__ERROR_TIMEOUT; - } - - - if (!ras_core_is_enabled(cmd_core)) - return RAS_CMD__ERROR_ACCESS_DENIED; + while (ras_core_gpu_in_reset(cmd_core)) { + msleep(1000); + if (!timeout--) + return RAS_CMD__ERROR_TIMEOUT; + } - res = amdgpu_ras_handle_cmd(cmd_core, cmd, NULL); - if (res == RAS_CMD__ERROR_UKNOWN_CMD) - res = rascore_handle_cmd(cmd_core, cmd, NULL); + res = amdgpu_ras_handle_cmd(cmd_core, cmd, NULL); + if (res == RAS_CMD__ERROR_UKNOWN_CMD) + res = rascore_handle_cmd(cmd_core, cmd, NULL); - cmd->cmd_res = res; - } + cmd->cmd_res = res; - if ((cmd->cmd_res == RAS_CMD__SUCCESS) && - ((cmd->output_size + sizeof(*cmd)) > buf_size)) { - RAS_INFO("Insufficient command buffer size 0x%x!\n", buf_size); + if (cmd->output_size > cmd->output_buf_size) { + RAS_DEV_ERR(cmd_core->dev, + "Output size 0x%x exceeds output buffer size 0x%x!\n", + cmd->output_size, cmd->output_buf_size); return RAS_CMD__SUCCESS_EXEED_BUFFER; } diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h index 73832c28cb55..5973b156cc85 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.h @@ -49,7 +49,6 @@ struct ras_cmd_translate_memory_fd_rsp { int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd, void *data); -int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context *ras_core, - uint8_t *cmd_buf, uint32_t buf_size); +int amdgpu_ras_submit_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd); #endif diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index 13c207c8a843..8007e49951d8 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -578,3 +578,34 @@ bool amdgpu_ras_mgr_is_rma(struct amdgpu_device *adev) return ras_core_gpu_is_rma(ras_mgr->ras_core); } + +int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, + uint32_t cmd_id, void *input, uint32_t input_size, + void *output, uint32_t out_size) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct ras_cmd_ctx *cmd_ctx; + uint32_t ctx_buf_size = PAGE_SIZE; + int ret; + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + cmd_ctx = kzalloc(ctx_buf_size, GFP_KERNEL); + if (!cmd_ctx) + return -ENOMEM; + + cmd_ctx->cmd_id = cmd_id; + + memcpy(cmd_ctx->input_buff_raw, input, input_size); + cmd_ctx->input_size = input_size; + cmd_ctx->output_buf_size = ctx_buf_size - sizeof(*cmd_ctx); + + ret = amdgpu_ras_submit_cmd(ras_mgr->ras_core, cmd_ctx); + if (!ret && !cmd_ctx->cmd_res && output && (out_size == cmd_ctx->output_size)) + memcpy(output, cmd_ctx->output_buff_raw, cmd_ctx->output_size); + + kfree(cmd_ctx); + + return ret; +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index 814b65ef1c62..42f190a8feb9 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -72,4 +72,7 @@ int amdgpu_ras_mgr_get_curr_nps_mode(struct amdgpu_device *adev, uint32_t *nps_m bool amdgpu_ras_mgr_check_retired_addr(struct amdgpu_device *adev, uint64_t addr); bool amdgpu_ras_mgr_is_rma(struct amdgpu_device *adev); +int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, + uint32_t cmd_id, void *input, uint32_t input_size, + void *output, uint32_t out_size); #endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h index 751ed50b9584..48a0715eb821 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.h @@ -153,7 +153,8 @@ struct ras_cmd_ctx { uint32_t cmd_res; uint32_t input_size; uint32_t output_size; - uint32_t reserved[6]; + uint32_t output_buf_size; + uint32_t reserved[5]; uint8_t input_buff_raw[RAS_CMD_MAX_IN_SIZE]; uint8_t output_buff_raw[]; }; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 45fc0608043f..01122b55c98a 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -62,7 +62,8 @@ int ras_core_convert_timestamp_to_time(struct ras_core_context *ras_core, uint64_t timestamp, struct ras_time *tm) { int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - uint64_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + uint64_t month = 0, day = 0, hour = 0, minute = 0, second = 0; + uint32_t year = 0; int seconds_per_day = 24 * 60 * 60; int seconds_per_hour = 60 * 60; int seconds_per_minute = 60; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c index 9e0a4f605db0..cd6b057bdaf3 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom.c @@ -199,17 +199,6 @@ static int __ras_eeprom_xfer(struct ras_core_context *ras_core, u32 eeprom_addr, return -EINVAL; } - -/** - * __eeprom_xfer -- Read/write from/to an I2C EEPROM device - * @i2c_adap: pointer to the I2C adapter to use - * @eeprom_addr: EEPROM address from which to read/write - * @eeprom_buf: pointer to data buffer to read into/write from - * @buf_size: the size of @eeprom_buf - * @read: True if reading from the EEPROM, false if writing - * - * Returns the number of bytes read/written; -errno on error. - */ static int __eeprom_xfer(struct ras_core_context *ras_core, u32 eeprom_addr, u8 *eeprom_buf, u32 buf_size, bool read) { @@ -454,13 +443,6 @@ static void ras_set_eeprom_table_version(struct ras_eeprom_control *control) hdr->version = RAS_TABLE_VER_V3; } -/** - * ras_eeprom_reset_table -- Reset the RAS EEPROM table - * @control: pointer to control structure - * - * Reset the contents of the header of the RAS EEPROM table. - * Return 0 on success, -errno on error. - */ int ras_eeprom_reset_table(struct ras_core_context *ras_core) { struct ras_eeprom_control *control = &ras_core->ras_eeprom; @@ -928,17 +910,6 @@ static int __ras_eeprom_read(struct ras_eeprom_control *control, return res; } -/** - * ras_eeprom_read -- read EEPROM - * @control: pointer to control structure - * @record: array of records to read into - * @num: number of records in @record - * - * Reads num records from the RAS table in EEPROM and - * writes the data into @record array. - * - * Returns 0 on success, -errno on error. - */ int ras_eeprom_read(struct ras_core_context *ras_core, struct eeprom_umc_record *record, const u32 num) { -- cgit v1.2.3 From 62902b88ffcb4094cc0814520e08035de74582c4 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 14 Oct 2025 15:30:58 +0800 Subject: drm/amdgpu: ras module supports error injection ras module supports error injection. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 60b42c652e7e..22398a1f1ab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1597,6 +1597,27 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, return 0; } +static int amdgpu_uniras_error_inject(struct amdgpu_device *adev, + struct ras_inject_if *info) +{ + struct ras_cmd_inject_error_req inject_req; + struct ras_cmd_inject_error_rsp rsp; + + if (!info) + return -EINVAL; + + memset(&inject_req, 0, sizeof(inject_req)); + inject_req.block_id = info->head.block; + inject_req.subblock_id = info->head.sub_block_index; + inject_req.address = info->address; + inject_req.error_type = info->head.type; + inject_req.instance_mask = info->instance_mask; + inject_req.value = info->value; + + return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR, + &inject_req, sizeof(inject_req), &rsp, sizeof(rsp)); +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -1614,6 +1635,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, info->head.block, info->head.sub_block_index); + if (amdgpu_uniras_enabled(adev)) + return amdgpu_uniras_error_inject(adev, info); + /* inject on guest isn't allowed, return success directly */ if (amdgpu_sriov_vf(adev)) return 0; -- cgit v1.2.3 From a6b5a7a0337e255b5ba6852e0f7d6b0b2cd7b018 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Sat, 11 Oct 2025 10:49:55 +0800 Subject: drm/amdgpu: query bad page info of ras module Query bad page info of ras module. V2: Update code to reuse bad page output code. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 142 ++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22398a1f1ab7..81f72da5b2f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1782,7 +1782,9 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* sysfs begin */ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, - struct ras_badpage **bps, unsigned int *count); + struct ras_badpage *bps, uint32_t count, uint32_t start); +static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage *bps, uint32_t count, uint32_t start); static char *amdgpu_ras_badpage_flags_str(unsigned int flags) { @@ -1840,19 +1842,50 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, unsigned int end = div64_ul(ppos + count - 1, element_size); ssize_t s = 0; struct ras_badpage *bps = NULL; - unsigned int bps_count = 0; + int bps_count = 0, i, status; + uint64_t address; memset(buf, 0, count); - if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + bps_count = end - start; + bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL); + if (!bps) + return 0; + + memset(bps, 0, sizeof(*bps) * bps_count); + + if (amdgpu_uniras_enabled(adev)) + bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start); + else + bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start); + + if (bps_count <= 0) { + kfree(bps); return 0; + } + + for (i = 0; i < bps_count; i++) { + address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT; + if (amdgpu_ras_check_critical_address(adev, address)) + continue; + + bps[i].size = AMDGPU_GPU_PAGE_SIZE; + + status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, + address); + if (status == -EBUSY) + bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; + else if (status == -ENOENT) + bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; + else + bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED; - for (; start < end && start < bps_count; start++) s += scnprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n", - bps[start].bp, - bps[start].size, - amdgpu_ras_badpage_flags_str(bps[start].flags)); + bps[i].bp, + bps[i].size, + amdgpu_ras_badpage_flags_str(bps[i].flags)); + } kfree(bps); @@ -2645,62 +2678,83 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) } } -/* recovery begin */ - -/* return 0 on success. - * caller need free bps. - */ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, - struct ras_badpage **bps, unsigned int *count) + struct ras_badpage *bps, uint32_t count, uint32_t start) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; - int i = 0; - int ret = 0, status; + int r = 0; + uint32_t i; if (!con || !con->eh_data || !bps || !count) return -EINVAL; mutex_lock(&con->recovery_lock); data = con->eh_data; - if (!data || data->count == 0) { - *bps = NULL; - ret = -EINVAL; - goto out; + if (start < data->count) { + for (i = start; i < data->count; i++) { + if (!data->bps[i].ts) + continue; + + bps[r].bp = data->bps[i].retired_page; + r++; + if (r >= count) + break; + } } + mutex_unlock(&con->recovery_lock); - *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL); - if (!*bps) { - ret = -ENOMEM; - goto out; - } + return r; +} - for (; i < data->count; i++) { - if (!data->bps[i].ts) - continue; +static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage *bps, uint32_t count, uint32_t start) +{ + struct ras_cmd_bad_pages_info_req cmd_input; + struct ras_cmd_bad_pages_info_rsp *output; + uint32_t group, start_group, end_group; + uint32_t pos, pos_in_group; + int r = 0, i; - (*bps)[i] = (struct ras_badpage){ - .bp = data->bps[i].retired_page, - .size = AMDGPU_GPU_PAGE_SIZE, - .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, - }; + if (!bps || !count) + return -EINVAL; - if (amdgpu_ras_check_critical_address(adev, - data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) - continue; + output = kmalloc(sizeof(*output), GFP_KERNEL); + if (!output) + return -ENOMEM; - status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, - data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); - if (status == -EBUSY) - (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (status == -ENOENT) - (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; + memset(&cmd_input, 0, sizeof(cmd_input)); + + start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP; + end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) / + RAS_CMD_MAX_BAD_PAGES_PER_GROUP; + + pos = start; + for (group = start_group; group < end_group; group++) { + memset(output, 0, sizeof(*output)); + cmd_input.group_index = group; + if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES, + &cmd_input, sizeof(cmd_input), output, sizeof(*output))) + goto out; + + if (pos >= output->bp_total_cnt) + goto out; + + pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP; + for (i = pos_in_group; i < output->bp_in_group; i++, pos++) { + if (!output->records[i].ts) + continue; + + bps[r].bp = output->records[i].retired_page; + r++; + if (r >= count) + goto out; + } } - *count = con->bad_page_num; out: - mutex_unlock(&con->recovery_lock); - return ret; + kfree(output); + return r; } static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev, -- cgit v1.2.3 From d88c8bec18c0ee7c65a7ecc160613118202754e2 Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Mon, 6 Oct 2025 15:47:45 -0500 Subject: drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix - This change prepares the later patches to intro _v2 suffix to SRIOV critical regions Signed-off-by: Ellen Pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 20 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 50 +++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f96beb96c75c..8cd02eb605c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -686,7 +686,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/ adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); amdgpu_virt_read_pf2vf_data(adev); } @@ -703,21 +703,21 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) if (adev->mman.fw_vram_usage_va) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.ras_telemetry = - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10)); + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); } else if (adev->mman.drv_vram_usage_va) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); adev->virt.fw_reserve.ras_telemetry = - (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10)); + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); } amdgpu_virt_read_pf2vf_data(adev); @@ -1304,7 +1304,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev, checksum = host_telemetry->header.checksum; used_size = host_telemetry->header.used_size; - if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10)) return 0; tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL); @@ -1383,7 +1383,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, checksum = host_telemetry->header.checksum; used_size = host_telemetry->header.used_size; - if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10)) return -EINVAL; cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL); @@ -1515,7 +1515,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev, checksum = host_telemetry->header.checksum; used_size = host_telemetry->header.used_size; - if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10)) return 0; tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 3a79ed7d8031..7509756b9ac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -23,26 +23,48 @@ #ifndef AMDGV_SRIOV_MSG__H_ #define AMDGV_SRIOV_MSG__H_ -/* unit in kilobytes */ -#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 -#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048 -#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2 -#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64 +#define AMD_SRIOV_MSG_SIZE_KB 1 + /* - * layout + * layout v1 * 0 64KB 65KB 66KB 68KB 132KB * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ... * | 64KB | 1KB | 1KB | 2KB | 64KB | ... */ -#define AMD_SRIOV_MSG_SIZE_KB 1 -#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB -#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) -#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) -#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB) +/* + * layout v2 (offsets are dynamically allocated and the offsets below are examples) + * 0 1KB 64KB 65KB 66KB 68KB 132KB + * | INITD_H | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ... + * | 1KB | 64KB | 1KB | 1KB | 2KB | 64KB | ... + * + * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously) + */ + +/* v1 layout sizes */ +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 64 +#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 1 +#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 1 +#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1 2 +#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 64 +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 \ + (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \ + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1) + +/* v1 offsets */ +#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1 0 +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 +#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048 +#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 +#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 \ + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB) +#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 \ + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB) +#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 \ + (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1) +#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1 \ + (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \ + AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1) /* * PF2VF history log: -- cgit v1.2.3 From 6d2191d226553c86f8ea98f5e6cba13d9ef13280 Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Tue, 7 Oct 2025 09:46:16 -0500 Subject: drm/amdgpu: Add SRIOV crit_region_version support 1. Added enum amd_sriov_crit_region_version to support multi versions 2. Added logic in SRIOV mailbox to regonize crit_region version during req_gpu_init_data Signed-off-by: Ellen Pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 23 ++++++++++++++++------- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8cd02eb605c5..56573fb27f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -150,9 +150,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev) virt->ops->req_init_data(adev); if (adev->virt.req_init_data_ver > 0) - DRM_INFO("host supports REQ_INIT_DATA handshake\n"); + dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n", + adev->virt.req_init_data_ver); else - DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n"); + dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n"); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index d1172c8e58c4..36247a160aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -262,6 +262,11 @@ struct amdgpu_virt_ras { DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST); +struct amdgpu_virt_region { + uint32_t offset; + uint32_t size_kb; +}; + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -289,6 +294,9 @@ struct amdgpu_virt { bool ras_init_done; uint32_t reg_access; + /* dynamic(v2) critical regions */ + struct amdgpu_virt_region init_data_header; + /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 7509756b9ac5..9228fd2c6dfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -66,6 +66,11 @@ (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \ AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1) +enum amd_sriov_crit_region_version { + GPU_CRIT_REGION_V1 = 1, + GPU_CRIT_REGION_V2 = 2, +}; + /* * PF2VF history log: * v1 defined in amdgim diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index e5282a5d05d9..cd5b2f07edb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -222,12 +222,20 @@ send_request: adev->virt.req_init_data_ver = 0; } else { if (req == IDH_REQ_GPU_INIT_DATA) { - adev->virt.req_init_data_ver = - RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1); - - /* assume V1 in case host doesn't set version number */ - if (adev->virt.req_init_data_ver < 1) - adev->virt.req_init_data_ver = 1; + switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) { + case GPU_CRIT_REGION_V2: + adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2; + adev->virt.init_data_header.offset = + RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2); + adev->virt.init_data_header.size_kb = + RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3); + break; + default: + adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1; + adev->virt.init_data_header.offset = -1; + adev->virt.init_data_header.size_kb = 0; + break; + } } } @@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev, static int xgpu_nv_request_init_data(struct amdgpu_device *adev) { - return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA); + return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA, + 0, GPU_CRIT_REGION_V2, 0); } static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, -- cgit v1.2.3 From 07009df6494dc9272b233eeddc10d26f545ff09a Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Tue, 7 Oct 2025 11:00:16 -0500 Subject: drm/amdgpu: Introduce SRIOV critical regions v2 during VF init 1. Introduced amdgpu_virt_init_critical_region during VF init. - VFs use init_data_header_offset and init_data_header_size_kb transmitted via PF2VF mailbox to fetch the offset of critical regions' offsets/sizes in VRAM and save to adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb. Signed-off-by: Ellen Pan Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 174 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 11 ++ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 31 +++++ 4 files changed, 220 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a99185ed0642..3ffb9bb1ec0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2782,6 +2782,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return r; + + r = amdgpu_virt_init_critical_region(adev); + if (r) + return r; } switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 56573fb27f63..1779b1ac30d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -44,6 +44,18 @@ vf2pf_info->ucode_info[ucode].version = ver; \ } while (0) +#define mmRCC_CONFIG_MEMSIZE 0xde3 + +const char *amdgpu_virt_dynamic_crit_table_name[] = { + "IP DISCOVERY", + "VBIOS IMG", + "RAS TELEMETRY", + "DATA EXCHANGE", + "BAD PAGE INFO", + "INIT HEADER", + "LAST", +}; + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { /* By now all MMIO pages except mailbox are blocked */ @@ -843,6 +855,168 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev) adev->virt.ras.cper_rptr = 0; } +static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end) +{ + uint32_t sum = 0; + + if (buf_start >= buf_end) + return 0; + + for (; buf_start < buf_end; buf_start++) + sum += buf_start[0]; + + return 0xffffffff - sum; +} + +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) +{ + struct amd_sriov_msg_init_data_header *init_data_hdr = NULL; + uint32_t init_hdr_offset = adev->virt.init_data_header.offset; + uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10; + uint64_t vram_size; + int r = 0; + uint8_t checksum = 0; + + /* Skip below init if critical region version != v2 */ + if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2) + return 0; + + if (init_hdr_offset < 0) { + dev_err(adev->dev, "Invalid init header offset\n"); + return -EINVAL; + } + + vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); + if (!vram_size || vram_size == U32_MAX) + return -EINVAL; + vram_size <<= 20; + + if ((init_hdr_offset + init_hdr_size) > vram_size) { + dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n"); + return -EINVAL; + } + + /* Allocate for init_data_hdr */ + init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL); + if (!init_data_hdr) + return -ENOMEM; + + amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr, + sizeof(struct amd_sriov_msg_init_data_header), false); + + /* Table validation */ + if (strncmp(init_data_hdr->signature, + AMDGPU_SRIOV_CRIT_DATA_SIGNATURE, + AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) { + dev_err(adev->dev, "Invalid init data signature: %.4s\n", + init_data_hdr->signature); + r = -EINVAL; + goto out; + } + + checksum = amdgpu_virt_crit_region_calc_checksum( + (uint8_t *)&init_data_hdr->initdata_offset, + (uint8_t *)init_data_hdr + + sizeof(struct amd_sriov_msg_init_data_header)); + if (checksum != init_data_hdr->checksum) { + dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n", + checksum, init_data_hdr->checksum); + r = -EINVAL; + goto out; + } + + memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn)); + memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl)); + + adev->virt.crit_regn.offset = init_data_hdr->initdata_offset; + adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb; + + /* Validation and initialization for each table entry */ + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) { + if (!init_data_hdr->ip_discovery_size_in_kb || + init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) { + dev_err(adev->dev, "Invalid %s size: 0x%x\n", + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID], + init_data_hdr->ip_discovery_size_in_kb); + r = -EINVAL; + goto out; + } + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset = + init_data_hdr->ip_discovery_offset; + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb = + init_data_hdr->ip_discovery_size_in_kb; + } + + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) { + if (!init_data_hdr->vbios_img_size_in_kb) { + dev_err(adev->dev, "Invalid %s size: 0x%x\n", + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID], + init_data_hdr->vbios_img_size_in_kb); + r = -EINVAL; + goto out; + } + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset = + init_data_hdr->vbios_img_offset; + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb = + init_data_hdr->vbios_img_size_in_kb; + } + + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) { + if (!init_data_hdr->ras_tele_info_size_in_kb) { + dev_err(adev->dev, "Invalid %s size: 0x%x\n", + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID], + init_data_hdr->ras_tele_info_size_in_kb); + r = -EINVAL; + goto out; + } + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset = + init_data_hdr->ras_tele_info_offset; + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb = + init_data_hdr->ras_tele_info_size_in_kb; + } + + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) { + if (!init_data_hdr->dataexchange_size_in_kb) { + dev_err(adev->dev, "Invalid %s size: 0x%x\n", + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID], + init_data_hdr->dataexchange_size_in_kb); + r = -EINVAL; + goto out; + } + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset = + init_data_hdr->dataexchange_offset; + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb = + init_data_hdr->dataexchange_size_in_kb; + } + + if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) { + if (!init_data_hdr->bad_page_size_in_kb) { + dev_err(adev->dev, "Invalid %s size: 0x%x\n", + amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID], + init_data_hdr->bad_page_size_in_kb); + r = -EINVAL; + goto out; + } + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset = + init_data_hdr->bad_page_info_offset; + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb = + init_data_hdr->bad_page_size_in_kb; + } + + adev->virt.is_dynamic_crit_regn_enabled = true; + +out: + kfree(init_data_hdr); + init_data_hdr = NULL; + + return r; +} + void amdgpu_virt_init(struct amdgpu_device *adev) { bool is_sriov = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 36247a160aa6..8d03a8620de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -54,6 +54,12 @@ #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2 +/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */ +#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA" +#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4 + +#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id))) + enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, SRIOV_VF_MODE_ONE_VF, @@ -296,6 +302,9 @@ struct amdgpu_virt { /* dynamic(v2) critical regions */ struct amdgpu_virt_region init_data_header; + struct amdgpu_virt_region crit_regn; + struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID]; + bool is_dynamic_crit_regn_enabled; /* vf2pf message */ struct delayed_work vf2pf_work; @@ -432,6 +441,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev); void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); void amdgpu_virt_init(struct amdgpu_device *adev); +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev); + bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 9228fd2c6dfd..1cee083fb6bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -71,6 +71,37 @@ enum amd_sriov_crit_region_version { GPU_CRIT_REGION_V2 = 2, }; +/* v2 layout offset enum (in order of allocation) */ +enum amd_sriov_msg_table_id_enum { + AMD_SRIOV_MSG_IPD_TABLE_ID = 0, + AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, + AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID, + AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID, + AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID, + AMD_SRIOV_MSG_INITD_H_TABLE_ID, + AMD_SRIOV_MSG_MAX_TABLE_ID, +}; + +struct amd_sriov_msg_init_data_header { + char signature[4]; /* "INDA" */ + uint32_t version; + uint32_t checksum; + uint32_t initdata_offset; /* 0 */ + uint32_t initdata_size_in_kb; /* 5MB */ + uint32_t valid_tables; + uint32_t vbios_img_offset; + uint32_t vbios_img_size_in_kb; + uint32_t dataexchange_offset; + uint32_t dataexchange_size_in_kb; + uint32_t ras_tele_info_offset; + uint32_t ras_tele_info_size_in_kb; + uint32_t ip_discovery_offset; + uint32_t ip_discovery_size_in_kb; + uint32_t bad_page_info_offset; + uint32_t bad_page_size_in_kb; + uint32_t reserved[8]; +}; + /* * PF2VF history log: * v1 defined in amdgim -- cgit v1.2.3 From 13ccaa84430efa78f9eedb3a04cac2f9af254d77 Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Wed, 8 Oct 2025 15:01:10 -0500 Subject: drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV - During guest driver init, asa VFs receive PF msg to init dynamic critical region(v2), VFs reuse fw_vram_usage_* from ttm to store critical region tables in a 5MB chunk. Signed-off-by: Ellen Pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 29 +++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 ++++++++ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index c7d32fb216e4..636385c80f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) u8 frev, crev; int usage_bytes = 0; - if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) { - if (frev == 2 && crev == 1) { - fw_usage_v2_1 = - (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); - amdgpu_atomfirmware_allocate_fb_v2_1(adev, - fw_usage_v2_1, - &usage_bytes); - } else if (frev >= 2 && crev >= 2) { - fw_usage_v2_2 = - (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset); - amdgpu_atomfirmware_allocate_fb_v2_2(adev, - fw_usage_v2_2, - &usage_bytes); + /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */ + if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) { + if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) { + if (frev == 2 && crev == 1) { + fw_usage_v2_1 = + (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); + amdgpu_atomfirmware_allocate_fb_v2_1(adev, + fw_usage_v2_1, + &usage_bytes); + } else if (frev >= 2 && crev >= 2) { + fw_usage_v2_2 = + (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset); + amdgpu_atomfirmware_allocate_fb_v2_2(adev, + fw_usage_v2_2, + &usage_bytes); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2f2a88979644..326476089db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1941,17 +1941,17 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; /* - *The reserved vram for driver must be pinned to the specified - *place on the VRAM, so reserve it early. + * The reserved VRAM for the driver must be pinned to a specific + * location in VRAM, so reserve it early. */ r = amdgpu_ttm_drv_reserve_vram_init(adev); if (r) return r; /* - * only NAVI10 and onwards ASIC support for IP discovery. - * If IP discovery enabled, a block of memory should be - * reserved for IP discovey. + * only NAVI10 and later ASICs support IP discovery. + * If IP discovery is enabled, a block of memory should be + * reserved for it. */ if (adev->discovery.reserve_tmr) { r = amdgpu_ttm_reserve_tmr(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 1779b1ac30d2..7cabcbdb50e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1008,6 +1008,15 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) init_data_hdr->bad_page_size_in_kb; } + /* reserved memory starts from crit region base offset with the size of 5MB */ + adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset; + adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10; + dev_info(adev->dev, + "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n", + init_data_hdr->version, + adev->mman.fw_vram_usage_start_offset, + adev->mman.fw_vram_usage_size >> 10); + adev->virt.is_dynamic_crit_regn_enabled = true; out: -- cgit v1.2.3 From b4a8fcc7826ad2bc95f05c8481d122ba23535a65 Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Tue, 7 Oct 2025 11:12:39 -0500 Subject: drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets 1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from dynamic(v2) critical regions; 2. Added VF logic in amdgpu_virt to init bios image using the offsets from dynamic(v2) critical regions; Signed-off-by: Ellen Pan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 34 +++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 26 +++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 37 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ 4 files changed, 85 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 00e96419fcda..db705bf723f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -96,11 +96,12 @@ void amdgpu_bios_release(struct amdgpu_device *adev) * part of the system bios. On boot, the system bios puts a * copy of the igp rom at the start of vram if a discrete card is * present. - * For SR-IOV, the vbios image is also put in VRAM in the VF. + * For SR-IOV, if dynamic critical region is not enabled, + * the vbios image is also put at the start of VRAM in the VF. */ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) { - uint8_t __iomem *bios; + uint8_t __iomem *bios = NULL; resource_size_t vram_base; resource_size_t size = 256 * 1024; /* ??? */ @@ -114,18 +115,33 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) adev->bios = NULL; vram_base = pci_resource_start(adev->pdev, 0); - bios = ioremap_wc(vram_base, size); - if (!bios) - return false; adev->bios = kmalloc(size, GFP_KERNEL); - if (!adev->bios) { - iounmap(bios); + if (!adev->bios) return false; + + /* For SRIOV with dynamic critical region is enabled, + * the vbios image is put at a dynamic offset of VRAM in the VF. + * If dynamic critical region is disabled, follow the existing logic as on baremetal. + */ + if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) { + if (amdgpu_virt_get_dynamic_data_info(adev, + AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, (uint64_t *)&size)) { + amdgpu_bios_release(adev); + return false; + } + } else { + bios = ioremap_wc(vram_base, size); + if (!bios) { + amdgpu_bios_release(adev); + return false; + } + + memcpy_fromio(adev->bios, bios, size); + iounmap(bios); } + adev->bios_size = size; - memcpy_fromio(adev->bios, bios, size); - iounmap(bios); if (!check_atom_bios(adev, size)) { amdgpu_bios_release(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 3097b2946682..a7cb4665f485 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -304,10 +304,26 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * then it is not required to be reserved. */ if (sz_valid) { - uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; - amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, - adev->discovery.size, false); - adev->discovery.reserve_tmr = true; + if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) { + /* For SRIOV VFs with dynamic critical region enabled, + * we will get the IPD binary via below call. + * If dynamic critical is disabled, fall through to normal seq. + */ + if (amdgpu_virt_get_dynamic_data_info(adev, + AMD_SRIOV_MSG_IPD_TABLE_ID, binary, + (uint64_t *)&adev->discovery.size)) { + dev_err(adev->dev, + "failed to read discovery info from dynamic critical region."); + ret = -EINVAL; + goto exit; + } + } else { + uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; + + amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, + adev->discovery.size, false); + adev->discovery.reserve_tmr = true; + } } else { ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } @@ -316,7 +332,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, dev_err(adev->dev, "failed to read discovery info from memory, vram size read: %llx", vram_size); - +exit: return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7cabcbdb50e1..bf0fd95919e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1008,6 +1008,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) init_data_hdr->bad_page_size_in_kb; } + /* Validation for critical region info */ + if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) { + dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n", + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb); + r = -EINVAL; + goto out; + } + /* reserved memory starts from crit region base offset with the size of 5MB */ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset; adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10; @@ -1026,6 +1034,35 @@ out: return r; } +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev, + int data_id, uint8_t *binary, uint64_t *size) +{ + uint32_t data_offset = 0; + uint32_t data_size = 0; + enum amd_sriov_msg_table_id_enum data_table_id = data_id; + + if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID) + return -EINVAL; + + data_offset = adev->virt.crit_regn_tbl[data_table_id].offset; + data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10; + + /* Validate on input params */ + if (!binary || !size || *size < (uint64_t)data_size) + return -EINVAL; + + /* Proceed to copy the dynamic content */ + amdgpu_device_vram_access(adev, + (uint64_t)data_offset, (uint32_t *)binary, data_size, false); + *size = (uint64_t)data_size; + + dev_dbg(adev->dev, + "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n", + amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size); + + return 0; +} + void amdgpu_virt_init(struct amdgpu_device *adev) { bool is_sriov = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 8d03a8620de9..2a13cc892a13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -442,6 +442,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); void amdgpu_virt_init(struct amdgpu_device *adev); int amdgpu_virt_init_critical_region(struct amdgpu_device *adev); +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev, + int data_id, uint8_t *binary, uint64_t *size); bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); -- cgit v1.2.3 From 91da59131056ff5492655be6e96f77601e1cd43f Mon Sep 17 00:00:00 2001 From: Ellen Pan Date: Wed, 8 Oct 2025 15:36:50 -0500 Subject: drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets 1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions; Signed-off-by: Ellen Pan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 104 +++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index bf0fd95919e6..66e9cd103597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -218,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) &adev->virt.mm_table.gpu_addr, (void *)&adev->virt.mm_table.cpu_addr); if (r) { - DRM_ERROR("failed to alloc mm table and error = %d.\n", r); + dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r); return r; } memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); - DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n", + dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n", adev->virt.mm_table.gpu_addr, adev->virt.mm_table.cpu_addr); return 0; @@ -403,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE, &bo, NULL)) - DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); + dev_dbg(adev->dev, + "RAS WARN: reserve vram for retired page %llx fail\n", + bp); data->bps_bo[i] = bo; } data->last_reserved = i + 1; @@ -671,10 +673,34 @@ out: schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); } +static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data) +{ + uint32_t dataexchange_offset = + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset; + uint32_t dataexchange_size = + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10; + uint64_t pos = 0; + + dev_info(adev->dev, + "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n", + dataexchange_offset, dataexchange_size); + + if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) { + dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n"); + return -EINVAL; + } + + pos = (uint64_t)dataexchange_offset; + amdgpu_device_vram_access(adev, pos, pfvf_data, + dataexchange_size, false); + + return 0; +} + void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) { if (adev->virt.vf2pf_update_interval_ms != 0) { - DRM_INFO("clean up the vf2pf work item\n"); + dev_info(adev->dev, "clean up the vf2pf work item\n"); cancel_delayed_work_sync(&adev->virt.vf2pf_work); adev->virt.vf2pf_update_interval_ms = 0; } @@ -682,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) { + uint32_t *pfvf_data = NULL; + adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; adev->virt.vf2pf_update_retry_cnt = 0; if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { - DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); + dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!"); } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { /* go through this logic in ip_init and reset to init workqueue*/ amdgpu_virt_exchange_data(adev); @@ -697,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms)); } else if (adev->bios != NULL) { /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/ - adev->virt.fw_reserve.p_pf2vf = - (struct amd_sriov_msg_pf2vf_info_header *) - (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); + if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) { + pfvf_data = + kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10, + GFP_KERNEL); + if (!pfvf_data) { + dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n"); + return; + } - amdgpu_virt_read_pf2vf_data(adev); + if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data)) + goto free_pfvf_data; + + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data; + + amdgpu_virt_read_pf2vf_data(adev); + +free_pfvf_data: + kfree(pfvf_data); + pfvf_data = NULL; + adev->virt.fw_reserve.p_pf2vf = NULL; + } else { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); + + amdgpu_virt_read_pf2vf_data(adev); + } } } @@ -714,14 +765,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { if (adev->mman.fw_vram_usage_va) { - adev->virt.fw_reserve.p_pf2vf = - (struct amd_sriov_msg_pf2vf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); - adev->virt.fw_reserve.p_vf2pf = - (struct amd_sriov_msg_vf2pf_info_header *) - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); - adev->virt.fw_reserve.ras_telemetry = - (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); + if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.fw_vram_usage_va + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset + + (AMD_SRIOV_MSG_SIZE_KB << 10)); + adev->virt.fw_reserve.ras_telemetry = + (adev->mman.fw_vram_usage_va + + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset); + } else { + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10)); + adev->virt.fw_reserve.ras_telemetry = + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10)); + } } else if (adev->mman.drv_vram_usage_va) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) @@ -829,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg) break; default: /* other chip doesn't support SRIOV */ is_sriov = false; - DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); + dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type); break; } } @@ -1510,7 +1576,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc case AMDGPU_RAS_BLOCK__MPIO: return RAS_TELEMETRY_GPU_BLOCK_MPIO; default: - DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n", + dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n", block); return RAS_TELEMETRY_GPU_BLOCK_COUNT; } -- cgit v1.2.3 From fe2ccc7b7b238989d453747b302cf780af8fdb2b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Sat, 11 Oct 2025 16:52:17 +0800 Subject: drm/amdgpu: query block error count of ras module Query block error count of ras module. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 81f72da5b2f0..ff34e1c0d9e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1543,9 +1543,36 @@ out_fini_err_data: return ret; } +static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev, + struct ras_query_if *info) +{ + struct ras_cmd_block_ecc_info_req req = {0}; + struct ras_cmd_block_ecc_info_rsp rsp = {0}; + int ret; + + if (!info) + return -EINVAL; + + req.block_id = info->head.block; + req.subblock_id = info->head.sub_block_index; + + ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS, + &req, sizeof(req), &rsp, sizeof(rsp)); + if (!ret) { + info->ce_count = rsp.ce_count; + info->ue_count = rsp.ue_count; + info->de_count = rsp.de_count; + } + + return ret; +} + int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) { - return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID); + if (amdgpu_uniras_enabled(adev)) + return amdgpu_uniras_query_block_ecc(adev, info); + else + return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID); } int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, -- cgit v1.2.3 From 883687c30736c1b4d36c8cc6153bca5c2c76a5d8 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 16 Oct 2025 18:49:18 +0530 Subject: drm/amdgpu: Remove unused members in amdgpu_mman Discovery related members are now part of amdgpu_discovery_info. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 68e225efec3a..577ee04ce0bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -83,9 +83,6 @@ struct amdgpu_mman { uint64_t stolen_reserved_offset; uint64_t stolen_reserved_size; - /* discovery */ - uint8_t *discovery_bin; - uint32_t discovery_tmr_size; /* fw reserved memory */ struct amdgpu_bo *fw_reserved_memory; struct amdgpu_bo *fw_reserved_memory_extend; -- cgit v1.2.3 From 94edd6518613072c1bdc1ee69b63373d91cfca62 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Sun, 19 Oct 2025 19:05:46 -0700 Subject: drm/xe/xe3p_lpm: Configure MAIN_GAMCTRL_QUEUE_SELECT Starting from Xe3p, there are two different copies of some of the GAM registers: the traditional MCR variant at their old locations, and a new unicast copy known as "main_gamctrl." The Xe driver doesn't use these registers directly, but we need to instruct the GuC on which set it should use. Since the new, unicast registers are preferred (since they avoid the need for unnecessary MCR synchronization), set a new GuC feature flag, GUC_CTL_MAIN_GAMCTRL_QUEUES to convey this decision. A new helper function, xe_guc_using_main_gamctrl_queues(), is added for use in the 3 independent places that need to handle configuration of the new reporting queues. The mmio write to enable the main gamctl is only done during the general GuC upload. The gamctrl registers are not accessed by the GuC during hwconfig load. Last, the ADS blob for communicating the queue addresses contains both a DPA and GGTT offset. The GuC documentation states that DPA is now MBZ when using the MAIN_GAMCTRL queues. Bspec: 76445, 73540 Signed-off-by: Brian Welty Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251019-xe3p-gamctrl-v1-1-ad66d3c1908f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_guc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 6 +++++- drivers/gpu/drm/xe/xe_guc_fwif.h | 1 + 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 228de47c0f3f..1876f2957c6d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -544,6 +544,9 @@ #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) +#define MAIN_GAMCTRL_MODE XE_REG(0xef00) +#define MAIN_GAMCTRL_QUEUE_SELECT REG_BIT(0) + #define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) #define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define RCU_MODE_CCS_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d94490979adc..37e3735f34e6 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -91,6 +91,9 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc) if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev))) flags |= GUC_CTL_ENABLE_PSMI_LOGGING; + if (xe_guc_using_main_gamctrl_queues(guc)) + flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES; + return flags; } @@ -1255,8 +1258,13 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) int xe_guc_upload(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); + xe_guc_ads_populate(&guc->ads); + if (xe_guc_using_main_gamctrl_queues(guc)) + xe_mmio_write32(>->mmio, MAIN_GAMCTRL_MODE, MAIN_GAMCTRL_QUEUE_SELECT); + return __xe_guc_upload(guc); } @@ -1657,6 +1665,25 @@ void xe_guc_declare_wedged(struct xe_guc *guc) xe_guc_submit_wedge(guc); } +/** + * xe_guc_using_main_gamctrl_queues() - Detect which reporting queues to use. + * @guc: The GuC object + * + * For Xe3p and beyond, we want to program the hardware to use the + * "Main GAMCTRL queue" rather than the legacy queue before we upload + * the GuC firmware. This will allow the GuC to use a new set of + * registers for pagefault handling and avoid some unnecessary + * complications with MCR register range handling. + * + * Return: true if can use new main gamctrl queues. + */ +bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + return GT_VER(gt) >= 35; +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_guc_g2g_test.c" #endif diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 1cca05967e62..e2d4c5f44ae3 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -52,6 +52,7 @@ void xe_guc_stop_prepare(struct xe_guc *guc); void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); void xe_guc_declare_wedged(struct xe_guc *guc); +bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc); #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 22ac2a8b74c8..bcb85a1bf26d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -820,16 +820,20 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) static void guc_um_init_params(struct xe_guc_ads *ads) { u32 um_queue_offset = guc_ads_um_queues_offset(ads); + struct xe_guc *guc = ads_to_guc(ads); u64 base_dpa; u32 base_ggtt; + bool with_dpa; int i; + with_dpa = !xe_guc_using_main_gamctrl_queues(guc); + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, - base_dpa + (i * GUC_UM_QUEUE_SIZE)); + with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0); ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, base_ggtt + (i * GUC_UM_QUEUE_SIZE)); ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 50c4c2406132..c90dd266e9cf 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -113,6 +113,7 @@ struct guc_update_exec_queue_policy { #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) #define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) +#define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 -- cgit v1.2.3 From e681ddca301931dbea99fcb4bc5a5dacdb7fad06 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Sun, 19 Oct 2025 19:05:47 -0700 Subject: drm/xe/xe3p_lpm: Add special check in Media GT for Main GAMCTRL For Xe3p arch some subunits of an IP may be different. The GMD_ID register returns the Xe3p arch and dedicates the reserved field to mark possible subunit differences. Generally this is an under-the-hood implementation detail that drivers don't need to worry about, but the new Main_GAMCTRL may be enabled or not depending on those. Those reserved bits are described for Xe3p as: "If Zero, No special case to be handled. If Non-Zero, special case to be handled by Software agent.". That special case is defined per Arch. So if media version is 35, also check the additional reserved bits. To avoid confusion with the usual meaning of "reserved", define them as GMD_ID_SUBIP_FLAG_MASK. Bspec: 74201 Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20251019-xe3p-gamctrl-v1-2-ad66d3c1908f@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++++++ drivers/gpu/drm/xe/xe_guc.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 1876f2957c6d..3545e0be06da 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -37,6 +37,12 @@ #define GMD_ID XE_REG(0xd8c) #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) +/* + * Spec defines these bits as "Reserved", but then make them assume some + * meaning that depends on the ARCH. To avoid any confusion, call them + * SUBIP_FLAG_MASK. + */ +#define GMD_ID_SUBIP_FLAG_MASK REG_GENMASK(13, 6) #define GMD_ID_REVID REG_GENMASK(5, 0) #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 37e3735f34e6..ecc3e091b89e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1681,6 +1681,25 @@ bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); + /* + * For Xe3p media gt (35), the GuC and the CS subunits may be still Xe3 + * that lacks the Main GAMCTRL support. Reserved bits from the GMD_ID + * inform the IP version of the subunits. + */ + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) == 35) { + u32 val = xe_mmio_read32(>->mmio, GMD_ID); + u32 subip = REG_FIELD_GET(GMD_ID_SUBIP_FLAG_MASK, val); + + if (!subip) + return true; + + xe_gt_WARN(gt, subip != 1, + "GMD_ID has unknown value in the SUBIP_FLAG field - 0x%x\n", + subip); + + return false; + } + return GT_VER(gt) >= 35; } -- cgit v1.2.3 From 088267723a841a90a302ffb2556133ae2126ae00 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 16 Oct 2025 09:52:30 +0200 Subject: drm/xe: Extract xe_bo_is_visible_vram This will make it possible to call from xe_display code. Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20251016075701.379023-2-jfalempe@redhat.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_bo.c | 19 ++++++++++++++++++- drivers/gpu/drm/xe/xe_bo.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a1302afb31b9..66ed888bba07 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -609,6 +609,23 @@ static bool xe_ttm_resource_visible(struct ttm_resource *mem) return vres->used_visible_size == mem->size; } +/** + * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM. + * @bo: The BO + * + * This function checks whether a given BO resides entirely in memory visible from the CPU + * + * Returns: true if the BO is entirely visible, false otherwise. + * + */ +bool xe_bo_is_visible_vram(struct xe_bo *bo) +{ + if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo))) + return false; + + return xe_ttm_resource_visible(bo->ttm.resource); +} + static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { @@ -1634,7 +1651,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, if (!mem_type_is_vram(ttm_bo->resource->mem_type)) return -EIO; - if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) { + if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) { struct xe_migrate *migrate = mem_type_to_migrate(xe, ttm_bo->resource->mem_type); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 353d607d301d..911d5b90461a 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -274,6 +274,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); +bool xe_bo_is_visible_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); bool xe_bo_is_vm_bound(struct xe_bo *bo); -- cgit v1.2.3 From beaae3c0a4fecbafca1562373cc070207ad10b76 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 16 Oct 2025 09:52:31 +0200 Subject: drm/xe/display: Make panic support work on vram. Add a special path for VRAM using xe_res iterators to ensure a panic screen is shown on VRAM as well. Acked-by: Jocelyn Falempe Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251016075701.379023-3-jfalempe@redhat.com Signed-off-by: Maarten Lankhorst Acked-by: Jani Nikula --- drivers/gpu/drm/xe/display/xe_panic.c | 50 +++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c index f32b23338331..df663286092a 100644 --- a/drivers/gpu/drm/xe/display/xe_panic.c +++ b/drivers/gpu/drm/xe/display/xe_panic.c @@ -8,20 +8,23 @@ #include "intel_fb.h" #include "intel_panic.h" #include "xe_bo.h" +#include "xe_res_cursor.h" struct intel_panic { - struct page **pages; + struct xe_res_cursor res; + struct iosys_map vmap; + int page; - void *vaddr; }; static void xe_panic_kunmap(struct intel_panic *panic) { - if (panic->vaddr) { - drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); - kunmap_local(panic->vaddr); - panic->vaddr = NULL; + if (!panic->vmap.is_iomem && iosys_map_is_set(&panic->vmap)) { + drm_clflush_virt_range(panic->vmap.vaddr, PAGE_SIZE); + kunmap_local(panic->vmap.vaddr); } + iosys_map_clear(&panic->vmap); + panic->page = -1; } /* @@ -46,15 +49,29 @@ static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int new_page = offset >> PAGE_SHIFT; offset = offset % PAGE_SIZE; if (new_page != panic->page) { - xe_panic_kunmap(panic); + if (xe_bo_is_vram(bo)) { + /* Display is always mapped on root tile */ + struct xe_vram_region *vram = xe_bo_device(bo)->mem.vram; + + if (panic->page < 0 || new_page < panic->page) { + xe_res_first(bo->ttm.resource, new_page * PAGE_SIZE, + bo->ttm.base.size - new_page * PAGE_SIZE, &panic->res); + } else { + xe_res_next(&panic->res, PAGE_SIZE * (new_page - panic->page)); + } + iosys_map_set_vaddr_iomem(&panic->vmap, + vram->mapping + panic->res.start); + } else { + xe_panic_kunmap(panic); + iosys_map_set_vaddr(&panic->vmap, + ttm_bo_kmap_try_from_panic(&bo->ttm, + new_page)); + } panic->page = new_page; - panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, - panic->page); - } - if (panic->vaddr) { - u32 *pix = panic->vaddr + offset; - *pix = color; } + + if (iosys_map_is_set(&panic->vmap)) + iosys_map_wr(&panic->vmap, offset, u32, color); } struct intel_panic *intel_panic_alloc(void) @@ -68,6 +85,12 @@ struct intel_panic *intel_panic_alloc(void) int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) { + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + + if (xe_bo_is_vram(bo) && !xe_bo_is_visible_vram(bo)) + return -ENODEV; + panic->page = -1; sb->set_pixel = xe_panic_page_set_pixel; return 0; @@ -76,5 +99,4 @@ int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) void intel_panic_finish(struct intel_panic *panic) { xe_panic_kunmap(panic); - panic->page = -1; } -- cgit v1.2.3 From 9a28e446514557fbaa24ba3d8fb7eb052a4f9fb0 Mon Sep 17 00:00:00 2001 From: Michał Grzelak Date: Thu, 2 Oct 2025 11:56:40 +0200 Subject: drm/i915/display: Add no_psr_reason to PSR debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason in debugfs why PSR has been disabled. Currently, without this information, IGT tests cannot decide whether PSR has been disabled on purpose or was it abnormal behavior. Because of it, the status of the test cannot be decided correctly. Add no_psr_reason field into struct intel_psr. Add no_psr_reason into struct intel_crtc_state to prevent staying out of sync when _psr_compute_config is not using computed state. Write the reason, e.g. PSR setup timing not met, into proper PSR debugfs file. Update the reason from new_crtc_state in intel_psr_pre_plane_update. Extend format of debugfs file to have reason when it is non-NULL. Clean the reason when PSR is activated. Refactor intel_psr_post_plane_update to use no_psr_reason along keep_disabled. Changelog: v10->v11 - update the reason from new_crtc_state instead of old_crtc_state [Jouni] v9->v10 - log reason into intel_crtc_state instead of intel_dp->psr [Jouni] - remove clearing no_psr_reason in intel_psr_compute_config [Jouni] - change update of no_psr_reason into more readable form [Jouni] v8->v9 - add no_psr_reason into struct intel_crtc_state [Jouni] - update the reason in intel_psr_pre_plane_update [Jouni] - elaborate on motivation of the feature in commit message [Sebastian] - copy changelog to commit message [Sebastian] v7->v8 - reset no_psr_reason at the begin of intel_psr_compute_config [Jouni] - restore keep_disabled [Jouni] - drop setting "Sink not reliable" [Jouni] - add WA number [Jouni] - if non-NULL, write no_psr_reason after PSR mode [Jouni] v6->v7 - rebase onto new drm-tip v5->v6 - move setting no_psr_reason to intel_psr_post_plane_update [Jouni] - remove setting no_psr_reason when disabling PSR is temporary [Jouni] v4->v5 - fix indentation errors from checkpatch v3->v4 - change format of logging workaround #1136 v2->v3 - change reason description to be more specific [Mika] - remove BSpecs number & WA number from being written into no_psr_reason - replace spaces with tabs v1->v2 - set other reasons than "PSR setup timing not met" - clear no_psr_reason when activating PSR. Signed-off-by: Michał Grzelak Reviewed-by: Sebastian Brzezinka Reviewed-by: Jouni Högander Signed-off-by: Mika Kahola Link: https://lore.kernel.org/r/20251002095640.1347990-2-michal.grzelak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/intel_psr.c | 22 ++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1e0cd81c9474..5ae66b7444b6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1144,6 +1144,7 @@ struct intel_crtc_state { u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; + const char *no_psr_reason; /* * Frequency the dpll for the port should run at. Differs from the @@ -1728,6 +1729,8 @@ struct intel_psr { bool pkg_c_latency_used; u8 active_non_psr_pipes; + + const char *no_psr_reason; }; struct intel_dp { diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 9c7364d6d69d..cfc8b04f98fa 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1708,6 +1708,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, if (entry_setup_frames >= 0) { intel_dp->psr.entry_setup_frames = entry_setup_frames; } else { + crtc_state->no_psr_reason = "PSR setup timing not met"; drm_dbg_kms(display->drm, "PSR condition failed: PSR setup timing not met\n"); return false; @@ -1949,6 +1950,7 @@ static void intel_psr_activate(struct intel_dp *intel_dp) hsw_activate_psr1(intel_dp); intel_dp->psr.active = true; + intel_dp->psr.no_psr_reason = NULL; } /* @@ -3030,6 +3032,9 @@ void intel_psr_pre_plane_update(struct intel_atomic_state *state, mutex_lock(&psr->lock); + if (!new_crtc_state->has_psr) + psr->no_psr_reason = new_crtc_state->no_psr_reason; + if (psr->enabled) { /* * Reasons to disable: @@ -3094,12 +3099,19 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, drm_WARN_ON(display->drm, psr->enabled && !crtc_state->active_planes); - keep_disabled |= psr->sink_not_reliable; - keep_disabled |= !crtc_state->active_planes; + if (psr->sink_not_reliable) + keep_disabled = true; + + if (!crtc_state->active_planes) { + psr->no_psr_reason = "All planes inactive"; + keep_disabled = true; + } /* Display WA #1136: skl, bxt */ - keep_disabled |= DISPLAY_VER(display) < 11 && - crtc_state->wm_level_disabled; + if (DISPLAY_VER(display) < 11 && crtc_state->wm_level_disabled) { + psr->no_psr_reason = "Workaround #1136 for skl, bxt"; + keep_disabled = true; + } if (!psr->enabled && !keep_disabled) intel_psr_enable_locked(intel_dp, crtc_state); @@ -4148,6 +4160,8 @@ static void intel_psr_print_mode(struct intel_dp *intel_dp, region_et = ""; seq_printf(m, "PSR mode: %s%s%s\n", mode, status, region_et); + if (psr->no_psr_reason) + seq_printf(m, " %s\n", psr->no_psr_reason); } static int intel_psr_status(struct seq_file *m, struct intel_dp *intel_dp) -- cgit v1.2.3 From 2a407bc3aba6c57f3f4a88c5c2746411fad681bb Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 16 Oct 2025 22:55:07 +0000 Subject: drm/xe: Fix stolen size check to allow equal WOPCM size On some platforms without dedicated stolen memory, the calculated stolen size may be exactly equal to the WOPCM size. The current assertion incorrectly requires it to be strictly greater, causing a false failure. Relax the check to allow equality. Fixes: 65369b8e2961 ("drm/xe: Change return type of detect_bar2_dgfx() from s64 to u64") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6359 Cc: Matthew Auld Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251016225506.2256127-2-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index e368b2a36bac..1bddecfb723a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -106,7 +106,7 @@ static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) stolen_size = tile_size - mgr->stolen_base; - xe_assert(xe, stolen_size > wopcm_size); + xe_assert(xe, stolen_size >= wopcm_size); stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ -- cgit v1.2.3 From 5823d37a79a243b15e9c46dca6ffb935d78edd2a Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Mon, 20 Oct 2025 20:45:56 -0300 Subject: drm/xe/tests/pci: Convert GT count check to general device check We already have check_graphics_ip() and check_media_ip() as general functions to check the IP descriptors. The check in check_platform_gt_count() is simple enough such that we can convert the function to a more general device check. In an upcoming change, we will also add some checks for other members of struct xe_device_desc. As such, rename check_platform_gt_count() to check_platform_desc(). While at it, use inline (unsigned int) casting of max_gt_per_tile to keep checks for each member localized; and use KUNIT_EXPECT_*() variants of the macros to allow multiple issues to be reported. Reviewed-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251020-xe-kunit-dma_mask_size-va_bits-vm_max_level-v2-1-27b03971bc7e@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/tests/xe_pci_test.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 37b344df2dc3..4ba2fb93c9de 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -44,21 +44,20 @@ static void check_media_ip(struct kunit *test) KUNIT_ASSERT_EQ(test, mask, 0); } -static void check_platform_gt_count(struct kunit *test) +static void check_platform_desc(struct kunit *test) { const struct pci_device_id *pci = test->param_value; const struct xe_device_desc *desc = (const struct xe_device_desc *)pci->driver_data; - int max_gt = desc->max_gt_per_tile; - KUNIT_ASSERT_GT(test, max_gt, 0); - KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); + KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0); + KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE); } static struct kunit_case xe_pci_tests[] = { KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), - KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), + KUNIT_CASE_PARAM(check_platform_desc, xe_pci_id_gen_param), {} }; -- cgit v1.2.3 From 9d26a9beaea7d14789a686cc25f84aa4f589c190 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Mon, 20 Oct 2025 20:45:57 -0300 Subject: drm/xe/tests/pci: Check dma_mask_size, va_bits and vm_max_level Members dma_mask_size, va_bits and vm_max_level of struct xe_device_desc are all expected to be non-zero. Add checks for that in check_platform_desc(). Suggested-by: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251020-xe-kunit-dma_mask_size-va_bits-vm_max_level-v2-2-27b03971bc7e@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/tests/xe_pci_test.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 4ba2fb93c9de..4d10a7e2b570 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -50,8 +50,15 @@ static void check_platform_desc(struct kunit *test) const struct xe_device_desc *desc = (const struct xe_device_desc *)pci->driver_data; + KUNIT_EXPECT_GT(test, desc->dma_mask_size, 0); + KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0); KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE); + + KUNIT_EXPECT_GT(test, desc->va_bits, 0); + KUNIT_EXPECT_LE(test, desc->va_bits, 64); + + KUNIT_EXPECT_GT(test, desc->vm_max_level, 0); } static struct kunit_case xe_pci_tests[] = { -- cgit v1.2.3 From c94a7702d343f22eec958e82bebb930d9aeeb4a1 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 22 Oct 2025 00:48:14 +0200 Subject: drm/xe/vf: Revert logic of vf.migration.enabled Convert `enabled` property into `disabled`. Reviewed-by: Michal Wajdeczko Cc: Michal Wajdeczko Signed-off-by: Tomasz Lis Suggested-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251021224817.1593817-2-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_sriov_vf.c | 7 ++----- drivers/gpu/drm/xe/xe_sriov_vf_types.h | 6 +++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 911d5720917b..0d8135f3927c 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -130,7 +130,7 @@ bool xe_sriov_vf_migration_supported(struct xe_device *xe) { xe_assert(xe, IS_SRIOV_VF(xe)); - return xe->sriov.vf.migration.enabled; + return !xe->sriov.vf.migration.disabled; } static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) @@ -146,7 +146,7 @@ static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); va_end(va_args); - xe->sriov.vf.migration.enabled = false; + xe->sriov.vf.migration.disabled = true; } static void vf_migration_init_early(struct xe_device *xe) @@ -172,9 +172,6 @@ static void vf_migration_init_early(struct xe_device *xe) "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", guc_version.major, guc_version.minor); } - - xe->sriov.vf.migration.enabled = true; - xe_sriov_dbg(xe, "migration support enabled\n"); } /** diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h index 6a0fd0f5463e..d5f72d667817 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -34,10 +34,10 @@ struct xe_device_vf { /** @migration: VF Migration state data */ struct { /** - * @migration.enabled: flag indicating if migration support - * was enabled or not due to missing prerequisites + * @migration.disabled: flag indicating if migration support + * was turned off due to missing prerequisites */ - bool enabled; + bool disabled; } migration; /** @ccs: VF CCS state data */ -- cgit v1.2.3 From c88634339757736b86ed45c08bae74b4f47ed9d9 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 22 Oct 2025 00:48:15 +0200 Subject: drm/xe/vf: Fix GuC FW check for VF migration support The check whether GuC ABI version meets requirements shall be performed after said version is received from GuC. Doing it in wrong order was triggering a warning: xe 0000:00:02.1: [drm] Assertion `gt->sriov.vf.guc_version.major` failed! With this change, dislodge part of the VF migration support check and moved it to after GuC handshake. Reviewed-by: Michal Wajdeczko Cc: Michal Wajdeczko Tested-by: Matthew Brost #v1 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6349 Fixes: ff1d2b5e3d28 ("drm/xe: Read VF GMD_ID with a specifically-allocated dummy GT") Signed-off-by: Tomasz Lis Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251021224817.1593817-3-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_sriov_vf.c | 33 ++++++++++---------------- drivers/gpu/drm/xe/xe_sriov_vf.h | 1 + drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 46 ++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 0d8135f3927c..13d6c094ae8f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -133,7 +133,12 @@ bool xe_sriov_vf_migration_supported(struct xe_device *xe) return !xe->sriov.vf.migration.disabled; } -static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) +/** + * xe_sriov_vf_migration_disable - Turn off VF migration with given log message. + * @xe: the &xe_device instance. + * @fmt: format string for the log message, to be combined with following VAs. + */ +void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...) { struct va_format vaf; va_list va_args; @@ -156,22 +161,13 @@ static void vf_migration_init_early(struct xe_device *xe) * supported at production quality. */ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) - return vf_disable_migration(xe, - "experimental feature not available on production builds"); + return xe_sriov_vf_migration_disable(xe, + "experimental feature not available on production builds"); if (GRAPHICS_VER(xe) < 20) - return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", - GRAPHICS_VER(xe)); - - if (!IS_DGFX(xe)) { - struct xe_uc_fw_version guc_version; - - xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); - if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) - return vf_disable_migration(xe, - "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", - guc_version.major, guc_version.minor); - } + return xe_sriov_vf_migration_disable(xe, + "requires gfx version >= 20, but only %u found", + GRAPHICS_VER(xe)); } /** @@ -193,12 +189,7 @@ void xe_sriov_vf_init_early(struct xe_device *xe) */ int xe_sriov_vf_init_late(struct xe_device *xe) { - int err = 0; - - if (xe_sriov_vf_migration_supported(xe)) - err = xe_sriov_vf_ccs_init(xe); - - return err; + return xe_sriov_vf_ccs_init(xe); } static int sa_info_vf_ccs(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h index 4df95266b261..e967d4166a43 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -14,6 +14,7 @@ struct xe_device; void xe_sriov_vf_init_early(struct xe_device *xe); int xe_sriov_vf_init_late(struct xe_device *xe); bool xe_sriov_vf_migration_supported(struct xe_device *xe); +void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...); void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 790249801364..797a4b866226 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -10,6 +10,8 @@ #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_exec_queue_types.h" +#include "xe_gt_sriov_vf.h" +#include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_lrc.h" #include "xe_migrate.h" @@ -260,6 +262,45 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe) return err; } +/* + * Whether GuC requires CCS copy BBs for VF migration. + * @xe: the &xe_device instance. + * + * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs. + * + * Return: true if VF driver must participate in the CCS migration, false otherwise. + */ +static bool vf_migration_ccs_bb_needed(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + + return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe); +} + +/* + * Check for disable migration due to no CCS BBs support in GuC FW. + * @xe: the &xe_device instance. + * + * Performs late disable of VF migration feature in case GuC FW cannot support it. + * + * Returns: True if VF migration with CCS BBs is supported, false otherwise. + */ +static bool vf_migration_ccs_bb_support_check(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_uc_fw_version guc_version; + + xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) { + xe_sriov_vf_migration_disable(xe, + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", + guc_version.major, guc_version.minor); + return false; + } + + return true; +} + static void xe_sriov_vf_ccs_fini(void *arg) { struct xe_sriov_vf_ccs_ctx *ctx = arg; @@ -292,9 +333,10 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe) int err; xe_assert(xe, IS_SRIOV_VF(xe)); - xe_assert(xe, xe_sriov_vf_migration_supported(xe)); - if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)) + if (!xe_sriov_vf_migration_supported(xe) || + !vf_migration_ccs_bb_needed(xe) || + !vf_migration_ccs_bb_support_check(xe)) return 0; for_each_ccs_rw_ctx(ctx_id) { -- cgit v1.2.3 From 9a940bb52dcb359f639536bed977d837e323a593 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 22 Oct 2025 00:48:16 +0200 Subject: drm/xe: Assert that VF will never use fixed placement of BOs Most BOs do not care at which offset they will be accessed within GGTT or PPGTT. The few which do care, should be only created on PF, and mapped within GGTT. On VFs, mapping at fixed offset is prohibited, as each VF is granted access to a range of GGTT address space. Since fixed addresses of GGTT mapping can only be used on PF, add an assert which makes sure no attempt of fixed placement will happen for a driver probed on a VF. Reviewed-by: Michal Wajdeczko Cc: Michal Wajdeczko Signed-off-by: Tomasz Lis Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251021224817.1593817-4-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 66ed888bba07..ad0da473ee65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2275,6 +2275,12 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, struct ttm_place *place = bo->placements; u32 vram_flag, vram_stolen_flags; + /* + * to allow fixed placement in GGTT of a VF, post-migration fixups would have to + * include selecting a new fixed offset and shifting the page ranges for it + */ + xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT)); + if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; -- cgit v1.2.3 From 13fb4b39e95d7ed62a598881896803d686fdb01d Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 22 Oct 2025 00:48:17 +0200 Subject: drm/xe/vf: Do not disable VF migration on ATS-M Our current support for the VF migration depends on the availability of the MEMIRQ rather than specific graphics version 20. Relax our early migration support checks to allow also use some older platforms like ATS-M for experiments and testing. Do not allow ADL, as supporting VF migration through MMIO interrupts would require additional changes in order to achieve reliability. Reviewed-by: Michal Wajdeczko Cc: Michal Wajdeczko Signed-off-by: Tomasz Lis Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20251021224817.1593817-5-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_sriov_vf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 13d6c094ae8f..39c829daa97c 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -164,10 +164,9 @@ static void vf_migration_init_early(struct xe_device *xe) return xe_sriov_vf_migration_disable(xe, "experimental feature not available on production builds"); - if (GRAPHICS_VER(xe) < 20) - return xe_sriov_vf_migration_disable(xe, - "requires gfx version >= 20, but only %u found", - GRAPHICS_VER(xe)); + if (!xe_device_has_memirq(xe)) + return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); + } /** -- cgit v1.2.3 From 1f1314e8e71385bae319e43082b798c11f6648bc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:36 -0700 Subject: drm/xe: Check return value of GGTT workqueue allocation Workqueue allocation can fail, so check the return value of the GGTT workqueue allocation and fail driver initialization if the allocation fails. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 40680f0c49a1..20d226d90c50 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -312,6 +312,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); -- cgit v1.2.3 From 480b358e7d8ef69fd8f1b0cad6e07c7d70a36ee4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:37 -0700 Subject: drm/xe: Do not wake device during a GT reset Waking the device during a GT reset can lead to unintended memory allocation, which is not allowed since GT resets occur in the reclaim path. Prevent this by holding a PM reference while a reset is in flight. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d8e94fb8b9bd..89808b33d0a8 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -818,17 +818,19 @@ static int gt_reset(struct xe_gt *gt) unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) { + err = -ECANCELED; + goto err_pm_put; + } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) { + err = -ENODEV; + goto err_pm_put; + } xe_gt_info(gt, "reset started\n"); - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; @@ -875,6 +877,7 @@ err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); return err; @@ -896,7 +899,9 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt) -- cgit v1.2.3 From f6c1345a851bbe51d49fc7bc2653da92bb8896f5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:38 -0700 Subject: drm/xe: Avoid PM wake reference during VF migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtual Functions (VFs) do not use runtime PM. Avoid taking PM references during VF migration, as lockdep may get confused—VF migration occurs in the reclaim path, and waking a PM reference can trigger memory allocation warnings. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 46518e629ba3..d0b102ab6ce8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -31,7 +31,6 @@ #include "xe_lrc.h" #include "xe_memirq.h" #include "xe_mmio.h" -#include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" @@ -1218,7 +1217,6 @@ static void vf_post_migration_recovery(struct xe_gt *gt) xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); - xe_pm_runtime_get(xe); retry = vf_post_migration_shutdown(gt); if (retry) goto queue; @@ -1241,12 +1239,10 @@ static void vf_post_migration_recovery(struct xe_gt *gt) vf_post_migration_kickstart(gt); - xe_pm_runtime_put(xe); xe_gt_sriov_notice(gt, "migration recovery ended\n"); return; fail: vf_post_migration_abort(gt); - xe_pm_runtime_put(xe); xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(xe); return; @@ -1254,7 +1250,6 @@ fail: queue: xe_gt_sriov_info(gt, "Re-queuing migration recovery\n"); queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); - xe_pm_runtime_put(xe); } static void migration_worker_func(struct work_struct *w) -- cgit v1.2.3 From 3bd3763a749edee52eb1e0a633650794749fa82c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 20 Oct 2025 18:44:38 +0300 Subject: drm/i915/dp: Simplify intel_dp_needs_8b10b_fec() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_crtc_state::fec_enable check in intel_dp_needs_8b10b_fec() is redundant drop it: originally it ensured that the FEC enabled state for a CRTC other than the CRTC intel_dp_needs_8b10b_fec() called for is preserved, even if DSC is not enabled for the latter CRTC. The way FEC gets enabled for all the CRTCs on an 8b10b MST link is changed by commit 7c027070e98d ("drm/i915/dp_mst: Track DSC enabled status on the MST link") and commit 470b84af457e ("drm/i915/dp_mst: Recompute all MST link CRTCs if DSC gets enabled on the link") depending on intel_dsc_enabled_on_link() in intel_dp_needs_8b10b_fec() instead of the above fec_enable check. Drop the check. Suggested-by: Jouni Högander Reviewed-by: Jouni Högander Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20251020154438.416761-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b0aeb6c2de86..475518b4048b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2377,9 +2377,6 @@ bool intel_dp_needs_8b10b_fec(const struct intel_crtc_state *crtc_state, if (intel_dp_is_uhbr(crtc_state)) return false; - if (crtc_state->fec_enable) - return true; - /* * Though eDP v1.5 supports FEC with DSC, unlike DP, it is optional. * Since, FEC is a bandwidth overhead, continue to not enable it for -- cgit v1.2.3 From 4df3b340ff6e9f499735d8b52b96a9257fde3918 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 22 Oct 2025 13:07:16 +0300 Subject: drm/i915/dmc: Clear HRR EVT_CTL/HTP to zero on ADL-S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ADL-S the main DMC HRR event DMC_EVT_CTL/HTP are never restored to their previous values during DC6 exit. This angers assert_dmc_loaded(), and basically makes the HRR handler unusable because we don't rewrite EVT_HTP when enabling DMC events. Let's just clear the HRR EVT_CTL/HTP to zero from the beginnning so that the expected value matches the post-DC6 reality. I suppose if we ever had actual use for HRR we'd have to both, reject HRR+PSR, and reprogram EVT_HTP when enabling the event. But for now we don't care about HRR so keeping both registers zeroed is fine. Cc: stable@vger.kernel.org Tested-by: Petr Vorel Fixes: 43175c92d403 ("drm/i915/dmc: Assert DMC is loaded harder") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15153 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251022100718.24803-2-ville.syrjala@linux.intel.com Reviewed-by: Petr Vorel Reviewed-by: Imre Deak Tested-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 55 +++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index be6d37ea1139..5ba531e3bc36 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -550,6 +550,36 @@ static bool is_event_handler(struct intel_display *display, REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == event_id; } +static bool fixup_dmc_evt(struct intel_display *display, + enum intel_dmc_id dmc_id, + i915_reg_t reg_ctl, u32 *data_ctl, + i915_reg_t reg_htp, u32 *data_htp) +{ + if (!is_dmc_evt_ctl_reg(display, dmc_id, reg_ctl)) + return false; + + if (!is_dmc_evt_htp_reg(display, dmc_id, reg_htp)) + return false; + + /* make sure reg_ctl and reg_htp are for the same event */ + if (i915_mmio_reg_offset(reg_ctl) - i915_mmio_reg_offset(DMC_EVT_CTL(display, dmc_id, 0)) != + i915_mmio_reg_offset(reg_htp) - i915_mmio_reg_offset(DMC_EVT_HTP(display, dmc_id, 0))) + return false; + + /* + * On ADL-S the HRR event handler is not restored after DC6. + * Clear it to zero from the beginning to avoid mismatches later. + */ + if (display->platform.alderlake_s && dmc_id == DMC_FW_MAIN && + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg_ctl, *data_ctl)) { + *data_ctl = 0; + *data_htp = 0; + return true; + } + + return false; +} + static bool disable_dmc_evt(struct intel_display *display, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) @@ -1068,9 +1098,32 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; + } + + for (i = 0; i < mmio_count - 1; i++) { + u32 orig_mmiodata[2] = { + dmc_info->mmiodata[i], + dmc_info->mmiodata[i+1], + }; + + if (!fixup_dmc_evt(display, dmc_id, + dmc_info->mmioaddr[i], &dmc_info->mmiodata[i], + dmc_info->mmioaddr[i+1], &dmc_info->mmiodata[i+1])) + continue; + + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_CTL)\n", + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), + orig_mmiodata[0], dmc_info->mmiodata[i]); + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_HTP)\n", + i+1, i915_mmio_reg_offset(dmc_info->mmioaddr[i+1]), + orig_mmiodata[1], dmc_info->mmiodata[i+1]); + } + for (i = 0; i < mmio_count; i++) { drm_dbg_kms(display->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n", - i, mmioaddr[i], mmiodata[i], + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), dmc_info->mmiodata[i], is_dmc_evt_ctl_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" : is_dmc_evt_htp_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "", disable_dmc_evt(display, dmc_id, dmc_info->mmioaddr[i], -- cgit v1.2.3 From 9c2503beb84f58e87b3de4649dafc113296e06a6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 22 Oct 2025 13:07:17 +0300 Subject: drm/i915/dmc: Fixup TGL/ADL-S HRR event handler type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TGL/ADL-S DMC firmware incorrectly uses the undelayed vblank trigger for the HRR event, when it should be using the delayed vblank trigger. Fixed DMC firmware was never relaesed and isntead the Windows driver just fixes this up by hand. Follow suit. Not that we actually enable the HRR event currently. But let's fix up the event ID, just in case someone ever needs to enable this. Tested-by: Petr Vorel Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251022100718.24803-3-ville.syrjala@linux.intel.com Reviewed-by: Petr Vorel Reviewed-by: Imre Deak Tested-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 5ba531e3bc36..02ef0a252220 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -577,6 +577,21 @@ static bool fixup_dmc_evt(struct intel_display *display, return true; } + /* + * TGL/ADL-S DMC firmware incorrectly uses the undelayed vblank + * event for the HRR handler, when it should be using the delayed + * vblank event instead. Fixed firmware was never released + * so the Windows driver just hacks around it by overriding + * the event ID. Do the same. + */ + if ((display->platform.tigerlake || display->platform.alderlake_s) && + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg_ctl, *data_ctl)) { + *data_ctl &= ~DMC_EVT_CTL_EVENT_ID_MASK; + *data_ctl |= REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, + MAINDMC_EVENT_VBLANK_DELAYED_A); + return true; + } + return false; } @@ -598,7 +613,7 @@ static bool disable_dmc_evt(struct intel_display *display, /* also disable the HRR event on the main DMC on TGL/ADLS */ if ((display->platform.tigerlake || display->platform.alderlake_s) && - is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg, data)) + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_DELAYED_A, reg, data)) return true; return false; -- cgit v1.2.3 From 6b1209d158e2c8a56e67f61d65e0f22d3754fb3a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 22 Oct 2025 13:07:18 +0300 Subject: drm/i915/dmc: Set DMC_EVT_CTL_ENABLE for disabled event handlers as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMC_EVT_CTL_ENABLE cannot be cleared once set. So currently any event we never enable will have DMC_EVT_CTL_ENABLE cleared, whereas any event which has been enabled even once will have DMC_EVT_CTL_ENABLE set. For that reason assert_dmc_loaded() has a special case to ignore any mismatches in DMC_EVT_CTL_ENABLE. Eliminate the special case by always configuring DMC_EVT_CTL_ENABLE based on the original firmware event definition. Now all event handlers will have DMC_EVT_CTL_ENABLE set, whether or not the event has been enabled in the past. All disabled event handlers will still have the event type set to DMC_EVENT_FALSE so they will not actually trigger despite DMC_EVT_CTL_ENABLE being set. Tested-by: Petr Vorel Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251022100718.24803-4-ville.syrjala@linux.intel.com Reviewed-by: Petr Vorel Reviewed-by: Imre Deak Tested-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 02ef0a252220..5ba17e66d90b 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -513,10 +513,16 @@ static u32 pipedmc_interrupt_mask(struct intel_display *display) PIPEDMC_ATS_FAULT; } -static u32 dmc_evt_ctl_disable(void) +static u32 dmc_evt_ctl_disable(u32 dmc_evt_ctl) { - return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, - DMC_EVT_CTL_TYPE_EDGE_0_1) | + /* + * DMC_EVT_CTL_ENABLE cannot be cleared once set. Always + * configure it based on the original event definition to + * avoid mismatches in assert_dmc_loaded(). + */ + return (dmc_evt_ctl & DMC_EVT_CTL_ENABLE) | + REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, + DMC_EVT_CTL_TYPE_EDGE_0_1) | REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, DMC_EVENT_FALSE); } @@ -626,7 +632,7 @@ static u32 dmc_mmiodata(struct intel_display *display, if (disable_dmc_evt(display, dmc_id, dmc->dmc_info[dmc_id].mmioaddr[i], dmc->dmc_info[dmc_id].mmiodata[i])) - return dmc_evt_ctl_disable(); + return dmc_evt_ctl_disable(dmc->dmc_info[dmc_id].mmiodata[i]); else return dmc->dmc_info[dmc_id].mmiodata[i]; } @@ -685,12 +691,6 @@ static void assert_dmc_loaded(struct intel_display *display, found = intel_de_read(display, reg); expected = dmc_mmiodata(display, dmc, dmc_id, i); - /* once set DMC_EVT_CTL_ENABLE can't be cleared :/ */ - if (is_dmc_evt_ctl_reg(display, dmc_id, reg)) { - found &= ~DMC_EVT_CTL_ENABLE; - expected &= ~DMC_EVT_CTL_ENABLE; - } - drm_WARN(display->drm, found != expected, "DMC %d mmio[%d]/0x%x incorrect (expected 0x%x, current 0x%x)\n", dmc_id, i, i915_mmio_reg_offset(reg), expected, found); @@ -843,7 +843,7 @@ static void dmc_configure_event(struct intel_display *display, if (!is_event_handler(display, dmc_id, event_id, reg, data)) continue; - intel_de_write(display, reg, enable ? data : dmc_evt_ctl_disable()); + intel_de_write(display, reg, enable ? data : dmc_evt_ctl_disable(data)); num_handlers++; } -- cgit v1.2.3 From 998703342876b5daf084d9182387efab0005a07f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 15:14:49 +0300 Subject: drm/i915/display: drop a few unnecessary i915_drv.h includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've stopped using struct drm_i915_private in intel_fb.c and skl_universal_plane.c, so we can drop the i915_drv.h includes. Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251022121450.452649-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 1 - drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 3958628c73e9..6ade38198f39 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -10,7 +10,6 @@ #include #include -#include "i915_drv.h" #include "i915_utils.h" #include "intel_bo.h" #include "intel_display.h" diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 0319174adf95..765d288cce2b 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -7,9 +7,9 @@ #include #include #include +#include #include "pxp/intel_pxp.h" -#include "i915_drv.h" #include "i915_utils.h" #include "intel_bo.h" #include "intel_de.h" @@ -24,6 +24,7 @@ #include "intel_plane.h" #include "intel_psr.h" #include "intel_psr_regs.h" +#include "intel_step.h" #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_universal_plane_regs.h" -- cgit v1.2.3 From 68aeace1b15ae5eaa37566fa6f09a2cbc6399a70 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 15:14:50 +0300 Subject: drm/xe/compat: drop include xe_device.h from i915_drv.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_device.h and xe_device_has_flat_ccs() are no longer needed since commit 3a5c5c472c0e ("drm/i915/display: add HAS_AUX_CCS() feature check"). Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20251022121450.452649-2-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 7c657ea98a44..3e79a74ff7de 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -12,7 +12,6 @@ #include -#include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" static inline struct drm_i915_private *to_i915(const struct drm_device *dev) -- cgit v1.2.3 From 3c767f762be973711421876d9e05e4dfd93f74ce Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:30 +0100 Subject: drm/xe/migrate: fix offset and len check Restriction here is pitch of 4bytes to match pixel width (32b), and hw restriction where src and dst must be aligned to 64bytes. If any of that is not possible then we need a bounce buffer. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3112c966c67d..ce2ad876586c 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1883,7 +1883,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long i, j; bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); - if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || + if (drm_WARN_ON(&xe->drm, (!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) return ERR_PTR(-EOPNOTSUPP); @@ -2103,8 +2103,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || - !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { + if (!IS_ALIGNED(len, 4) || + !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) || + !IS_ALIGNED(offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; void *bounce; int err; -- cgit v1.2.3 From aaeef7a9c8b9206039a588a23e4dc11dddbefe2d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:31 +0100 Subject: drm/xe/migrate: rework size restrictions for sram pte emit We allow the input size to not be aligned to PAGE_SIZE, which leads to various bugs in build_pt_update_batch_sram() for PAGE_SIZE > 4K systems. For example if ptes is exactly one gpu_page_size then the chunk size is rounded down to zero. The simplest fix looks to be forcing PAGE_SIZE aligned inputs. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ce2ad876586c..451fae0106e5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1798,6 +1798,8 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, u32 ptes; int i = 0; + xe_tile_assert(m->tile, PAGE_ALIGNED(size)); + ptes = DIV_ROUND_UP(size, gpu_page_size); while (ptes) { u32 chunk = min(MAX_PTE_PER_SDI, ptes); @@ -1811,12 +1813,13 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr = sram_addr[i].addr & ~(gpu_page_size - 1); - u64 pte, orig_addr = addr; + u64 addr = sram_addr[i].addr; + u64 pte; xe_tile_assert(m->tile, sram_addr[i].proto == DRM_INTERCONNECT_SYSTEM); xe_tile_assert(m->tile, addr); + xe_tile_assert(m->tile, PAGE_ALIGNED(addr)); again: pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, @@ -1827,7 +1830,7 @@ again: if (gpu_page_size < PAGE_SIZE) { addr += XE_PAGE_SIZE; - if (orig_addr + PAGE_SIZE != addr) { + if (!PAGE_ALIGNED(addr)) { chunk--; goto again; } @@ -1918,10 +1921,10 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, if (use_pde) build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes, - sram_addr, len + sram_offset, 1); + sram_addr, npages << PAGE_SHIFT, 1); else build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, len + sram_offset, 0); + sram_addr, npages << PAGE_SHIFT, 0); if (dir == XE_MIGRATE_COPY_TO_VRAM) { if (use_pde) -- cgit v1.2.3 From fb188d8b00fc221fcc744109dfa29b9945c91913 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:32 +0100 Subject: drm/xe/migrate: fix chunk handling for 2M page emit On systems with PAGE_SIZE > 4K the chunk will likely be rounded down to zero, if say we have single 2M page, so one huge pte, since we also try to align the chunk to PAGE_SIZE / XE_PAGE_SIZE, which will be 16 on 64K systems. Make the ALIGN_DOWN conditional for 4K PTEs where we can encounter gpu_page_size < PAGE_SIZE. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 451fae0106e5..ce5543fa7a52 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1804,7 +1804,9 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, while (ptes) { u32 chunk = min(MAX_PTE_PER_SDI, ptes); - chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); + if (!level) + chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; bb->cs[bb->len++] = 0; -- cgit v1.2.3 From 1413329456aaf5d209ab9c73ae710fb88785ef78 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:33 +0100 Subject: drm/xe/migrate: fix batch buffer sizing In xe_migrate_vram() the copy can straddle page boundaries, so the len might look like a single page, but actually accounting for the offset within the page we will need to emit more than one PTE. Otherwise in some cases the batch buffer will be undersized leading to warnings later. We already have npages so use that instead. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-5-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ce5543fa7a52..fa87e0eddd09 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1894,7 +1894,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); - batch_size += pte_update_cmd_size(len); + batch_size += pte_update_cmd_size(npages << PAGE_SHIFT); batch_size += EMIT_COPY_DW; bb = xe_bb_new(gt, batch_size, use_usm_batch); -- cgit v1.2.3 From 0171dcce33fb0c5c26129d8334cd13e511b5522a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:34 +0100 Subject: drm/xe/migrate: trim batch buffer sizing We have an extra two dwords, but it looks like we should only need one for the extra bb_end. Likely this is just leftover from back when the arb handling was moved into the ring programming. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-6-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fa87e0eddd09..95aefe2e71f5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -847,7 +847,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, &ccs_it); while (size) { - u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + u32 batch_size = 1; /* MI_BATCH_BUFFER_END */ struct xe_sched_job *job; struct xe_bb *bb; u32 flush_flags = 0; @@ -1312,7 +1312,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, /* Calculate final sizes and batch size.. */ pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; - batch_size = 2 + + batch_size = 1 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, @@ -1876,7 +1876,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool use_usm_batch = xe->info.has_usm; struct dma_fence *fence = NULL; - u32 batch_size = 2; + u32 batch_size = 1; u64 src_L0_ofs, dst_L0_ofs; struct xe_sched_job *job; struct xe_bb *bb; -- cgit v1.2.3 From 1e12dbae9d726b1e4ada1e5e101ccf6bb7a8c8aa Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:35 +0100 Subject: drm/xe/migrate: support MEM_COPY instruction Make this the default on xe2+ when doing a copy. This has a few advantages over the exiting copy instruction: 1) It has a special PAGE_COPY mode that claims to be optimised for page-in/page-out, which is the vast majority of current users. 2) It also has a simple BYTE_COPY mode that supports byte granularity copying without any restrictions. With 2) we can now easily skip the bounce buffer flow when copying buffers with strange sizing/alignment, like for memory_access. But that is left for the next patch. v2 (Matt Brost): - Use device info to check whether device should use the MEM_COPY path. This should fit better with making this a configfs tunable. - And with that also keep old path still functional on xe2 for possible experimentation. - Add a define for PAGE_COPY page-size. v3 (Matt Brost): - Fallback to an actual linear copy for pitch=1. - Also update NVL. BSpec: 57561 Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-7-matthew.auld@intel.com --- drivers/gpu/drm/xe/instructions/xe_gpu_commands.h | 6 +++ drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_migrate.c | 61 +++++++++++++++++++++-- drivers/gpu/drm/xe/xe_pci.c | 5 ++ drivers/gpu/drm/xe/xe_pci_types.h | 1 + 5 files changed, 72 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index 8cfcd3360896..5d41ca297447 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -31,6 +31,12 @@ #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) #define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) +#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8) +#define MEM_COPY_PAGE_COPY_MODE REG_BIT(19) +#define MEM_COPY_MATRIX_COPY REG_BIT(17) +#define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28) +#define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3) + #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 #define PVC_MEM_SET_MATRIX REG_BIT(17) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9e3666a226da..8f62ee7a73ac 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -300,6 +300,8 @@ struct xe_device { * pcode mailbox commands. */ u8 has_mbx_power_limits:1; + /** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */ + u8 has_mem_copy_instr:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; /** @info.has_range_tlb_inval: Has range based TLB invalidations */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 95aefe2e71f5..1bbc7bca33ed 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -699,9 +699,9 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, } #define EMIT_COPY_DW 10 -static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, - u64 src_ofs, u64 dst_ofs, unsigned int size, - unsigned int pitch) +static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u64 dst_ofs, unsigned int size, + unsigned int pitch) { struct xe_device *xe = gt_to_xe(gt); u32 mocs = 0; @@ -730,6 +730,61 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, bb->cs[bb->len++] = upper_32_bits(src_ofs); } +#define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */ +static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u64 dst_ofs, unsigned int size, unsigned int pitch) +{ + u32 mode, copy_type, width; + + xe_gt_assert(gt, IS_ALIGNED(size, pitch)); + xe_gt_assert(gt, pitch <= U16_MAX); + xe_gt_assert(gt, pitch); + xe_gt_assert(gt, size); + + if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) && + IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) && + IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) { + mode = MEM_COPY_PAGE_COPY_MODE; + copy_type = 0; /* linear copy */ + width = size / PAGE_COPY_MODE_PS; + } else if (pitch > 1) { + xe_gt_assert(gt, size / pitch <= U16_MAX); + mode = 0; /* BYTE_COPY */ + copy_type = MEM_COPY_MATRIX_COPY; + width = pitch; + } else { + mode = 0; /* BYTE_COPY */ + copy_type = 0; /* linear copy */ + width = size; + } + + xe_gt_assert(gt, width <= U16_MAX); + + bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type; + bb->cs[bb->len++] = width - 1; + bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */ + bb->cs[bb->len++] = pitch - 1; + bb->cs[bb->len++] = pitch - 1; + bb->cs[bb->len++] = lower_32_bits(src_ofs); + bb->cs[bb->len++] = upper_32_bits(src_ofs); + bb->cs[bb->len++] = lower_32_bits(dst_ofs); + bb->cs[bb->len++] = upper_32_bits(dst_ofs); + bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) | + FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index); +} + +static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u64 dst_ofs, unsigned int size, + unsigned int pitch) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.has_mem_copy_instr) + emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch); + else + emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch); +} + static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) { return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c326430e75b5..ece45157fd31 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -342,6 +342,7 @@ static const struct xe_device_desc lnl_desc = { .has_display = true, .has_flat_ccs = 1, .has_pxp = true, + .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .va_bits = 48, @@ -362,6 +363,7 @@ static const struct xe_device_desc bmg_desc = { .has_heci_cscfi = 1, .has_late_bind = true, .has_sriov = true, + .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .subplatforms = (const struct xe_subplatform_desc[]) { @@ -378,6 +380,7 @@ static const struct xe_device_desc ptl_desc = { .has_display = true, .has_flat_ccs = 1, .has_sriov = true, + .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .needs_shared_vf_gt_wq = true, @@ -390,6 +393,7 @@ static const struct xe_device_desc nvls_desc = { .dma_mask_size = 46, .has_display = true, .has_flat_ccs = 1, + .has_mem_copy_instr = true, .max_gt_per_tile = 2, .require_force_probe = true, .va_bits = 48, @@ -655,6 +659,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && desc->has_sriov; + xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index a4451bdc79fb..9892c063a9c5 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -46,6 +46,7 @@ struct xe_device_desc { u8 has_late_bind:1; u8 has_llc:1; u8 has_mbx_power_limits:1; + u8 has_mem_copy_instr:1; u8 has_pxp:1; u8 has_sriov:1; u8 needs_scratch:1; -- cgit v1.2.3 From f558630a7d4375ff030891d38daa6b97843c63c8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Oct 2025 17:38:36 +0100 Subject: drm/xe/migrate: skip bounce buffer path on xe2 Now that we support MEM_COPY we should be able to use the PAGE_COPY mode, otherwise falling back to BYTE_COPY mode when we have odd sizing/alignment. v2: - Use info.has_mem_copy_instr - Rebase on latest changes. v3 (Matt Brost): - Allow various pitches including 1byte pitch for MEM_COPY Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251022163836.191405-8-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 43 ++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1bbc7bca33ed..921c9c1ea41f 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1920,6 +1920,25 @@ enum xe_migrate_copy_dir { #define XE_CACHELINE_BYTES 64ull #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1) +static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len) +{ + u32 pitch; + + if (IS_ALIGNED(len, PAGE_SIZE)) + pitch = PAGE_SIZE; + else if (IS_ALIGNED(len, SZ_4K)) + pitch = SZ_4K; + else if (IS_ALIGNED(len, SZ_256)) + pitch = SZ_256; + else if (IS_ALIGNED(len, 4)) + pitch = 4; + else + pitch = 1; + + xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr); + return pitch; +} + static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long len, unsigned long sram_offset, @@ -1937,14 +1956,14 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, struct xe_bb *bb; u32 update_idx, pt_slot = 0; unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE); - unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? - PAGE_SIZE : 4; + unsigned int pitch = xe_migrate_copy_pitch(xe, len); int err; unsigned long i, j; bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); - if (drm_WARN_ON(&xe->drm, (!IS_ALIGNED(len, pitch)) || - (sram_offset | vram_addr) & XE_CACHELINE_MASK)) + if (!xe->info.has_mem_copy_instr && + drm_WARN_ON(&xe->drm, + (!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) return ERR_PTR(-EOPNOTSUPP); xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER); @@ -2163,9 +2182,10 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (!IS_ALIGNED(len, 4) || - !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) || - !IS_ALIGNED(offset, XE_CACHELINE_BYTES)) { + if (!xe->info.has_mem_copy_instr && + (!IS_ALIGNED(len, 4) || + !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) || + !IS_ALIGNED(offset, XE_CACHELINE_BYTES))) { int buf_offset = 0; void *bounce; int err; @@ -2227,6 +2247,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + cursor.start; int current_bytes; + u32 pitch; if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER) current_bytes = min_t(int, bytes_left, @@ -2234,13 +2255,13 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); - if (current_bytes & ~PAGE_MASK) { - int pitch = 4; - + pitch = xe_migrate_copy_pitch(xe, current_bytes); + if (xe->info.has_mem_copy_instr) + current_bytes = min_t(int, current_bytes, U16_MAX * pitch); + else current_bytes = min_t(int, current_bytes, round_down(S16_MAX * pitch, XE_CACHELINE_BYTES)); - } __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, -- cgit v1.2.3 From 5e0de2dfbc1bddbd47fbcc9dabb4917000b0ca27 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 21 Oct 2025 22:17:33 -0700 Subject: drm/xe/cri: Add CRI platform definition Add platform definition and PCI IDs for Crescent Island. Other platforms use INTEL_VGA_DEVICE since they have a PCI_BASE_CLASS_DISPLAY class. This is not the case for CRI, so just match on devid, which should be sufficient. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Shekhar Chauhan Link: https://lore.kernel.org/r/20251021-cri-v1-1-bf11e61d9f49@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + include/drm/intel/pciids.h | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ece45157fd31..6e59642e7820 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -400,6 +400,20 @@ static const struct xe_device_desc nvls_desc = { .vm_max_level = 4, }; +static const struct xe_device_desc cri_desc = { + DGFX_FEATURES, + PLATFORM(CRESCENTISLAND), + .dma_mask_size = 52, + .has_display = false, + .has_flat_ccs = false, + .has_mbx_power_limits = true, + .has_sriov = true, + .max_gt_per_tile = 2, + .require_force_probe = true, + .va_bits = 57, + .vm_max_level = 4, +}; + #undef PLATFORM __diag_pop(); @@ -427,6 +441,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc), INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), + INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 78286285c249..f516dbddfd88 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -25,6 +25,7 @@ enum xe_platform { XE_BATTLEMAGE, XE_PANTHERLAKE, XE_NOVALAKE_S, + XE_CRESCENTISLAND, }; enum xe_subplatform { diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 9f095a99d6c9..6e53fb4cdd37 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -893,4 +893,8 @@ MACRO__(0xD744, ## __VA_ARGS__), \ MACRO__(0xD745, ## __VA_ARGS__) +/* CRI */ +#define INTEL_CRI_IDS(MACRO__, ...) \ + MACRO__(0x674C, ## __VA_ARGS__) + #endif /* __PCIIDS_H__ */ -- cgit v1.2.3 From 402377bb25fe9b36c5ae3374c9b88105134c86a2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 21 Oct 2025 22:17:35 -0700 Subject: drm/xe/cri: Setup MOCS table CRI has a new MOCS table, but uses the same general ops as other Xe2/Xe3 platforms. Bspec: 71582 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20251021-cri-v1-3-bf11e61d9f49@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index e8ec4114302e..6613d3b48a84 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -568,6 +568,23 @@ static const struct xe_mocs_ops xe2_mocs_ops = { .dump = xe2_mocs_dump, }; +/* + * Note that the "L3" and "L4" register fields actually control the L2 and L3 + * caches respectively on this platform. + */ +static const struct xe_mocs_entry xe3p_xpc_mocs_table[] = { + /* Defer to PAT */ + MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), + /* UC */ + MOCS_ENTRY(1, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), + /* L2 */ + MOCS_ENTRY(2, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0), + /* L3 */ + MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), + /* L2 + L3 */ + MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { @@ -576,6 +593,15 @@ static unsigned int get_mocs_settings(struct xe_device *xe, memset(info, 0, sizeof(struct xe_mocs_info)); switch (xe->info.platform) { + case XE_CRESCENTISLAND: + info->ops = &xe2_mocs_ops; + info->table_size = ARRAY_SIZE(xe3p_xpc_mocs_table); + info->table = xe3p_xpc_mocs_table; + info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->wb_index = 4; + info->unused_entries_index = 4; + break; case XE_NOVALAKE_S: case XE_PANTHERLAKE: case XE_LUNARLAKE: -- cgit v1.2.3 From 9ea9b45701ab50049a722450abc28346d1121e6e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 22 Oct 2025 16:01:22 -0700 Subject: drm/xe: Use SVM range helpers in PT layer We have helpers SVM range start, end, and size. Use them in the PT layer rather than directly looking at the struct. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251022230122.922382-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d22fd1ccc0ba..7c5bca78c8bf 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -715,7 +715,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .vm = vm, .tile = tile, .curs = &curs, - .va_curs_start = range ? range->base.itree.start : + .va_curs_start = range ? xe_svm_range_start(range) : xe_vma_start(vma), .vma = vma, .wupd.entries = entries, @@ -734,7 +734,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, } if (xe_svm_range_has_dma_mapping(range)) { xe_res_first_dma(range->base.pages.dma_addr, 0, - range->base.itree.last + 1 - range->base.itree.start, + xe_svm_range_size(range), &curs); xe_svm_range_debug(range, "BIND PREPARE - MIXED"); } else { @@ -778,8 +778,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, walk_pt: ret = xe_pt_walk_range(&pt->base, pt->level, - range ? range->base.itree.start : xe_vma_start(vma), - range ? range->base.itree.last + 1 : xe_vma_end(vma), + range ? xe_svm_range_start(range) : xe_vma_start(vma), + range ? xe_svm_range_end(range) : xe_vma_end(vma), &xe_walk.base); *num_entries = xe_walk.wupd.num_used_entries; @@ -975,8 +975,8 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, if (!(pt_mask & BIT(tile->id))) return false; - (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start, - range->base.itree.last + 1, &xe_walk.base); + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_svm_range_start(range), + xe_svm_range_end(range), &xe_walk.base); return xe_walk.needs_invalidate; } @@ -1661,8 +1661,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_svm_range *range, struct xe_vm_pgtable_update *entries) { - u64 start = range ? range->base.itree.start : xe_vma_start(vma); - u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma); + u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma); + u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma); struct xe_pt_stage_unbind_walk xe_walk = { .base = { .ops = &xe_pt_stage_unbind_ops, @@ -1872,7 +1872,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing bind, with range [%lx...%lx)\n", - range->base.itree.start, range->base.itree.last); + xe_svm_range_start(range), xe_svm_range_end(range) - 1); pt_op->vma = NULL; pt_op->bind = true; @@ -1887,8 +1887,8 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile, pt_op->num_entries, true); xe_pt_update_ops_rfence_interval(pt_update_ops, - range->base.itree.start, - range->base.itree.last + 1); + xe_svm_range_start(range), + xe_svm_range_end(range)); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; @@ -1983,7 +1983,7 @@ static int unbind_range_prepare(struct xe_vm *vm, vm_dbg(&vm->xe->drm, "Preparing unbind, with range [%lx...%lx)\n", - range->base.itree.start, range->base.itree.last); + xe_svm_range_start(range), xe_svm_range_end(range) - 1); pt_op->vma = XE_INVALID_VMA; pt_op->bind = false; @@ -1994,8 +1994,8 @@ static int unbind_range_prepare(struct xe_vm *vm, xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start, - range->base.itree.last + 1); + xe_pt_update_ops_rfence_interval(pt_update_ops, xe_svm_range_start(range), + xe_svm_range_end(range)); ++pt_update_ops->current_op; pt_update_ops->needs_svm_lock = true; pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) || -- cgit v1.2.3 From 5fa20ff843c691cfefd7dcd3d38fb2158566cec2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 21 Oct 2025 15:45:55 -0700 Subject: drm/xe/xe3p_xpc: Treat all PSMI MCR ranges as "INSTANCE0" Early versions of the B-spec originally indicated that Xe3p_XPC had two ranges of PSMI registers requiring MCR steering (one starting at 0xB500, one starting at 0xB600), and that reads of registers in these ranges required different grpid values to ensure that a non-terminated value is obtained. A late-breaking spec update has simplified this; both ranges can be safely steered to grpid=0 for reads. Drop the "PSMI19" replication type and related code, and consolidate both register ranges into a single entry in the "INSTANCE0" steering table. Bspec: 74418 Fixes: be614ea19dad ("drm/xe/xe3p_xpc: Add MCR steering") Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251021224556.437970-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_mcr.c | 15 +-------------- drivers/gpu/drm/xe/xe_gt_types.h | 8 -------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 81ecd9382635..0b4b617d413b 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -268,13 +268,8 @@ static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = { {}, }; -static const struct xe_mmio_range xe3p_xpc_psmi_grp19_steering_table[] = { - { 0x00B500, 0x00B5FF }, - {}, -}; - static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { - { 0x00B600, 0x00B6FF }, /* PSMI0 */ + { 0x00B500, 0x00B6FF }, /* PSMI */ { 0x00C800, 0x00CFFF }, /* GAMCTRL */ { 0x00F000, 0x00F0FF }, /* GAMCTRL */ {}, @@ -452,12 +447,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_psmi(struct xe_gt *gt) -{ - gt->steering[PSMI19].group_target = 19; - gt->steering[PSMI19].instance_target = 0; -} - static void init_steering_gam1(struct xe_gt *gt) { gt->steering[GAM1].group_target = 1; @@ -474,7 +463,6 @@ static const struct { [DSS] = { "DSS / XeCore", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [PSMI19] = { "PSMI[19]", init_steering_psmi }, [GAM1] = { "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 }, [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, @@ -524,7 +512,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; - gt->steering[PSMI19].ranges = xe3p_xpc_psmi_grp19_steering_table; } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index d93faa1eedef..6e9c84b33b60 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -72,14 +72,6 @@ enum xe_steering_type { OADDRM, SQIDI_PSMI, - /* - * The bspec lists multiple ranges as "PSMI," but the different - * ranges with that label have different grpid steering values so we - * treat them independently in code. Note that the ranges with grpid=0 - * are included in the INSTANCE0 group above. - */ - PSMI19, - /* * Although most GAM ranges must be steered to (0,0) and thus use the * INSTANCE0 type farther down, some platforms have special rules -- cgit v1.2.3 From 6d5511e56b2d6474878cb7be44f5abb894842ef3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 21 Oct 2025 15:45:56 -0700 Subject: drm/xe/xe3p_xpc: Add MCR steering for NODE and L3BANK ranges The bspec was originally missing the information related to steering of L3-related ranges. Now that a late-breaking spec update has added the necessary information, implement the steering rules in the code. Note that the sole L3BANK range is the same as the one used on Xe_LPG, so we can re-use the existing table for that MCR type. Bspec: 74418 Fixes: be614ea19dad ("drm/xe/xe3p_xpc: Add MCR steering") Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20251021224556.437970-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_mcr.c | 26 ++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_topology.c | 7 +++++++ drivers/gpu/drm/xe/xe_gt_topology.h | 2 ++ drivers/gpu/drm/xe/xe_gt_types.h | 1 + 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 0b4b617d413b..164010860664 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -268,6 +268,12 @@ static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = { {}, }; +static const struct xe_mmio_range xe3p_xpc_node_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { { 0x00B500, 0x00B6FF }, /* PSMI */ { 0x00C800, 0x00CFFF }, /* GAMCTRL */ @@ -277,9 +283,22 @@ static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { static void init_steering_l3bank(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); struct xe_mmio *mmio = >->mmio; - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + if (GRAPHICS_VER(xe) >= 35) { + unsigned int first_bank = xe_l3_bank_mask_ffs(gt->fuse_topo.l3_bank_mask); + const int banks_per_node = 4; + unsigned int node = first_bank / banks_per_node; + + /* L3BANK ranges place node in grpID, bank in instanceid */ + gt->steering[L3BANK].group_target = node; + gt->steering[L3BANK].instance_target = first_bank % banks_per_node; + + /* NODE ranges split the node across grpid and instanceid */ + gt->steering[NODE].group_target = node >> 1; + gt->steering[NODE].instance_target = node & 1; + } else if (GRAPHICS_VERx100(xe) >= 1270) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, @@ -292,7 +311,7 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].group_target = __ffs(mslice_mask); gt->steering[L3BANK].instance_target = bank_mask & BIT(0) ? 0 : 2; - } else if (gt_to_xe(gt)->info.platform == XE_DG2) { + } else if (xe->info.platform == XE_DG2) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, xe_mmio_read32(mmio, MIRROR_FUSE3)); u32 bank = __ffs(mslice_mask) * 8; @@ -458,6 +477,7 @@ static const struct { void (*init)(struct xe_gt *gt); } xe_steering_types[] = { [L3BANK] = { "L3BANK", init_steering_l3bank }, + [NODE] = { "NODE", NULL }, /* initialized by l3bank init */ [MSLICE] = { "MSLICE", init_steering_mslice }, [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ [DSS] = { "DSS / XeCore", init_steering_dss }, @@ -512,6 +532,8 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[NODE].ranges = xe3p_xpc_node_steering_table; } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 1e0516ba7422..bd5260221d8d 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -309,6 +309,13 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } +/* Used to obtain the index of the first L3 bank. */ +unsigned int +xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) +{ + return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS); +} + /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index 3ff40f44bf2a..162d603c9b81 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -40,6 +40,8 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); +unsigned int +xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask); bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6e9c84b33b60..0b525643a048 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -66,6 +66,7 @@ struct xe_mmio_range { */ enum xe_steering_type { L3BANK, + NODE, MSLICE, LNCF, DSS, -- cgit v1.2.3 From 0790925dadad0997580df6e32cdccd54316807f2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 19:10:54 +0300 Subject: drm/{i915,xe}/fbdev: add intel_fbdev_fb_pitch_align() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For reasons still unknown, xe appears to require a stride alignment of XE_PAGE_SIZE, and using 64 leads to sporadic failures. Go back to having separate stride alignment for i915 and xe, until the issue is root caused. v2: Add FIXME comment, reference issue with Link (Ville) Cc: Ville Syrjälä Cc: Jouni Högander Cc: Maarten Lankhorst Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6220 Fixes: 4a36b339a14a ("drm/xe/fbdev: use the same 64-byte stride alignment as i915") Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/ae51d1e224048bdc87bf7a56d8f5ebd0fbb6a383.1756931441.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Link: https://lore.kernel.org/r/20251022161054.708388-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 5 +++++ drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 3 +++ drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 11 +++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 51d3d87caf43..d5c001761aa0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -218,7 +218,7 @@ static void intel_fbdev_fill_mode_cmd(struct drm_fb_helper_surface_size *sizes, mode_cmd->width = sizes->surface_width; mode_cmd->height = sizes->surface_height; - mode_cmd->pitches[0] = ALIGN(mode_cmd->width * DIV_ROUND_UP(sizes->surface_bpp, 8), 64); + mode_cmd->pitches[0] = intel_fbdev_fb_pitch_align(mode_cmd->width * DIV_ROUND_UP(sizes->surface_bpp, 8)); mode_cmd->pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); mode_cmd->modifier[0] = DRM_FORMAT_MOD_LINEAR; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 56b145841473..0838fdd37254 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -10,6 +10,11 @@ #include "i915_drv.h" #include "intel_fbdev_fb.h" +u32 intel_fbdev_fb_pitch_align(u32 stride) +{ + return ALIGN(stride, 64); +} + struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) { struct drm_i915_private *dev_priv = to_i915(drm); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h index 1fa44ed28543..fd0b3775dc1f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -6,12 +6,15 @@ #ifndef __INTEL_FBDEV_FB_H__ #define __INTEL_FBDEV_FB_H__ +#include + struct drm_device; struct drm_gem_object; struct drm_mode_fb_cmd2; struct fb_info; struct i915_vma; +u32 intel_fbdev_fb_pitch_align(u32 stride); struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size); void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj); int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info, diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 35a5b07eeba4..83eaf9263c6b 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -12,6 +12,17 @@ #include +/* + * FIXME: There shouldn't be any reason to have XE_PAGE_SIZE stride + * alignment. The same 64 as i915 uses should be fine, and we shouldn't need to + * have driver specific values. However, dropping the stride alignment to 64 + * leads to underflowing the bo pin count in the atomic cleanup work. + */ +u32 intel_fbdev_fb_pitch_align(u32 stride) +{ + return ALIGN(stride, XE_PAGE_SIZE); +} + struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size) { struct xe_device *xe = to_xe_device(drm); -- cgit v1.2.3 From aa883719281edcc929df654a5b6b1e98e272cb00 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:17 +0300 Subject: drm/i915/vrr: Fix intel_vrr_always_use_vrr_tg()==true on TGL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL the hardware always needs TRANS_VBLANK.VBLANK_START to be programemd with VACTIVE+SCL. Make it so. The current way of programming it with crtc_vblank_start only works for the legacy timing generator, as there the delayed vblank does happen exactly at VACTIVE+SCL. But if one tries to change intel_vrr_always_use_vrr_tg() to always use the VRR timing generator on TGL, crtc_vblank_start will point to the VRR timing generator's delayed vblank, which may not match VACTIVE+SCL. Fortunately the state checker caught the issue right away when I tried intel_vrr_always_use_vrr_tg()==true on TGL. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-2-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a8b4619de347..09d3eb422ad4 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2631,6 +2631,9 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta * to make it stand out in register dumps. */ crtc_vblank_start = 1; + } else if (DISPLAY_VER(display) == 12) { + /* VBLANK_START - VACTIVE defines SCL on TGL */ + crtc_vblank_start = crtc_vdisplay + crtc_state->set_context_latency; } if (DISPLAY_VER(display) >= 4) @@ -2721,6 +2724,9 @@ static void intel_set_transcoder_timings_lrr(const struct intel_crtc_state *crtc * to make it stand out in register dumps. */ crtc_vblank_start = 1; + } else if (DISPLAY_VER(display) == 12) { + /* VBLANK_START - VACTIVE defines SCL on TGL */ + crtc_vblank_start = crtc_vdisplay + crtc_state->set_context_latency; } /* -- cgit v1.2.3 From d239335e3ccd6df1fa952195c1ba1b94794ef8cb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:18 +0300 Subject: drm/i915/lrr: Include SCL in lrr_params_changed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If SCL is changing we need to take the LRR codepath to update it during a fastset. Account for that in lrr_params_changed(). The current code will only notice the SCL change if the position of the delayed vblank also changes. But that might not happen when using the VRR timing generator because the delayed vblank is then defined by the guardband instead of the SCL. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-3-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 09d3eb422ad4..490b4f2907e1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5711,12 +5711,16 @@ static int intel_modeset_checks(struct intel_atomic_state *state) return 0; } -static bool lrr_params_changed(const struct drm_display_mode *old_adjusted_mode, - const struct drm_display_mode *new_adjusted_mode) +static bool lrr_params_changed(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) { + const struct drm_display_mode *old_adjusted_mode = &old_crtc_state->hw.adjusted_mode; + const struct drm_display_mode *new_adjusted_mode = &new_crtc_state->hw.adjusted_mode; + return old_adjusted_mode->crtc_vblank_start != new_adjusted_mode->crtc_vblank_start || old_adjusted_mode->crtc_vblank_end != new_adjusted_mode->crtc_vblank_end || - old_adjusted_mode->crtc_vtotal != new_adjusted_mode->crtc_vtotal; + old_adjusted_mode->crtc_vtotal != new_adjusted_mode->crtc_vtotal || + old_crtc_state->set_context_latency != new_crtc_state->set_context_latency; } static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_state, @@ -5742,8 +5746,7 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta &new_crtc_state->dp_m_n)) new_crtc_state->update_m_n = false; - if (!lrr_params_changed(&old_crtc_state->hw.adjusted_mode, - &new_crtc_state->hw.adjusted_mode)) + if (!lrr_params_changed(old_crtc_state, new_crtc_state)) new_crtc_state->update_lrr = false; if (intel_crtc_needs_modeset(new_crtc_state)) -- cgit v1.2.3 From ec5fd6e754ca6dbce76aa2e5a532183ae1cc299b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:19 +0300 Subject: drm/i915: Remove the "vblank delay" state dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "vblank delay" we are including in the crtc state dump is meaningful only when running with fixed refresh rate timings. With VRR timings one has to look at the VRR state to figure out the same thing. Since we already dump the position of the delayed vblank for both fixed refresh rate and VRR timings, this "vblank delay" thing seems pretty much pointless now. Get rid of it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-4-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 23e25e97d060..e6f300dbb5ee 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -289,9 +289,7 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_printf(&p, "scanline offset: %d\n", intel_crtc_scanline_offset(pipe_config)); - drm_printf(&p, "vblank delay: %d, framestart delay: %d, MSA timing delay: %d set context latency: %d\n", - pipe_config->hw.adjusted_mode.crtc_vblank_start - - pipe_config->hw.adjusted_mode.crtc_vdisplay, + drm_printf(&p, "framestart delay: %d, MSA timing delay: %d, set context latency: %d\n", pipe_config->framestart_delay, pipe_config->msa_timing_delay, pipe_config->set_context_latency); -- cgit v1.2.3 From a5fac6761c033347d68ed088d623ad03bb97d5e3 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:20 +0300 Subject: drm/i915/vrr: Compute fixed refresh rate timings the same way as CMRR timings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify the VRR timing computation stuff a bit having both the fixed refresh rate and CMRR cases assign the crtc_state->vrr stuff in exactly the same way. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-5-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 92fb72b56f16..510dc199376f 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -305,12 +305,10 @@ void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state *crtc_state) static void intel_vrr_compute_fixed_rr_timings(struct intel_crtc_state *crtc_state) { - /* - * For fixed rr, vmin = vmax = flipline. - * vmin is already set to crtc_vtotal set vmax and flipline the same. - */ + /* For fixed rr, vmin = vmax = flipline */ crtc_state->vrr.vmax = crtc_state->hw.adjusted_mode.crtc_vtotal; - crtc_state->vrr.flipline = crtc_state->hw.adjusted_mode.crtc_vtotal; + crtc_state->vrr.vmin = crtc_state->vrr.vmax; + crtc_state->vrr.flipline = crtc_state->vrr.vmin; } static -- cgit v1.2.3 From 4082842e394da3dfe67bc14e2d4eb99ba749623e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:21 +0300 Subject: drm/i915/vrr: Reorganize intel_vrr_compute_cmrr_timings() a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the cmrr.enable assignment next to the mode_flags assignment to keep things in a bit more logical order in intel_vrr_compute_cmrr_timings(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-6-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 510dc199376f..01cb9cfe08e1 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -220,7 +220,6 @@ cmrr_get_vtotal(struct intel_crtc_state *crtc_state, bool video_mode_required) static void intel_vrr_compute_cmrr_timings(struct intel_crtc_state *crtc_state) { - crtc_state->cmrr.enable = true; /* * TODO: Compute precise target refresh rate to determine * if video_mode_required should be true. Currently set to @@ -230,6 +229,8 @@ void intel_vrr_compute_cmrr_timings(struct intel_crtc_state *crtc_state) crtc_state->vrr.vmax = cmrr_get_vtotal(crtc_state, false); crtc_state->vrr.vmin = crtc_state->vrr.vmax; crtc_state->vrr.flipline = crtc_state->vrr.vmin; + + crtc_state->cmrr.enable = true; crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } -- cgit v1.2.3 From 8a553374db132ef209f528678fac04818c8db121 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:22 +0300 Subject: drm/i195/vrr: Move crtc_state->vrr.{vmin,vmax} update into intel_vrr_compute_vrr_timings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way intel_vrr_compute_*_timings() works is rather confusing. First intel_vrr_compute_config() assigns the computed vmin/vmax into crtc_state->vrr.{vmin,vmax}, and then either intel_vrr_compute_vrr_timings() leaves them untouched or intel_vrr_compute_{cmrr,fixed_rr}_timings() overwrite them with something else. Clean this up by moving all crtc_state->vrr.{vmin,vmax} assignments into intel_vrr_compute_*_timings(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-7-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 01cb9cfe08e1..9179ad53a2e7 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -235,8 +235,13 @@ void intel_vrr_compute_cmrr_timings(struct intel_crtc_state *crtc_state) } static -void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state) +void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state, + int vmin, int vmax) { + crtc_state->vrr.vmax = vmax; + crtc_state->vrr.vmin = vmin; + crtc_state->vrr.flipline = crtc_state->vrr.vmin; + crtc_state->vrr.enable = true; crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } @@ -381,13 +386,8 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, vmax = vmin; } - crtc_state->vrr.vmin = vmin; - crtc_state->vrr.vmax = vmax; - - crtc_state->vrr.flipline = crtc_state->vrr.vmin; - if (crtc_state->uapi.vrr_enabled && vmin < vmax) - intel_vrr_compute_vrr_timings(crtc_state); + intel_vrr_compute_vrr_timings(crtc_state, vmin, vmax); else if (is_cmrr_frac_required(crtc_state) && is_edp) intel_vrr_compute_cmrr_timings(crtc_state); else -- cgit v1.2.3 From 291119eb180e2cc6a3517e33a6327296ad027321 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:23 +0300 Subject: drm/i915/vrr: Move compute_fixed_rr_timings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate intel_vrr_compute_fixed_rr_timings() next to its VRR and CMRR counterparts. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-8-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 9179ad53a2e7..99e10943368d 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -246,6 +246,15 @@ void intel_vrr_compute_vrr_timings(struct intel_crtc_state *crtc_state, crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } +static +void intel_vrr_compute_fixed_rr_timings(struct intel_crtc_state *crtc_state) +{ + /* For fixed rr, vmin = vmax = flipline */ + crtc_state->vrr.vmax = crtc_state->hw.adjusted_mode.crtc_vtotal; + crtc_state->vrr.vmin = crtc_state->vrr.vmax; + crtc_state->vrr.flipline = crtc_state->vrr.vmin; +} + static int intel_vrr_hw_value(const struct intel_crtc_state *crtc_state, int value) { @@ -308,15 +317,6 @@ void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state *crtc_state) intel_vrr_fixed_rr_hw_flipline(crtc_state) - 1); } -static -void intel_vrr_compute_fixed_rr_timings(struct intel_crtc_state *crtc_state) -{ - /* For fixed rr, vmin = vmax = flipline */ - crtc_state->vrr.vmax = crtc_state->hw.adjusted_mode.crtc_vtotal; - crtc_state->vrr.vmin = crtc_state->vrr.vmax; - crtc_state->vrr.flipline = crtc_state->vrr.vmin; -} - static int intel_vrr_compute_vmin(struct intel_crtc_state *crtc_state) { -- cgit v1.2.3 From 909cc33702d7e8106ea9949cbe2561fe9e35d727 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:24 +0300 Subject: drm/i915/vrr: Extract intel_vrr_set_vrr_timings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract intel_vrr_set_vrr_timings() as the counterpart to intel_vrr_set_fixed_rr_timings(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-9-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 99e10943368d..b2f139addc8b 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -686,20 +686,28 @@ static int intel_vrr_hw_flipline(const struct intel_crtc_state *crtc_state) return intel_vrr_hw_value(crtc_state, crtc_state->vrr.flipline); } -void intel_vrr_enable(const struct intel_crtc_state *crtc_state) +static void intel_vrr_set_vrr_timings(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - if (!crtc_state->vrr.enable) - return; - intel_de_write(display, TRANS_VRR_VMIN(display, cpu_transcoder), intel_vrr_hw_vmin(crtc_state) - 1); intel_de_write(display, TRANS_VRR_VMAX(display, cpu_transcoder), intel_vrr_hw_vmax(crtc_state) - 1); intel_de_write(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder), intel_vrr_hw_flipline(crtc_state) - 1); +} + +void intel_vrr_enable(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (!crtc_state->vrr.enable) + return; + + intel_vrr_set_vrr_timings(crtc_state); intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); -- cgit v1.2.3 From 587db4b31094972ad40a429cd8994c8bdcf443b8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:25 +0300 Subject: drm/i915/vrr: Avoid redundant TRANS_PUSH write in intel_vrr_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We keep TRANS_PUSH_EN always set for always_use_vrr_tg() platfforms, so there is no need to write it again in intel_vrr_enable(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-10-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index b2f139addc8b..6e8f8e673312 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -709,12 +709,12 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) intel_vrr_set_vrr_timings(crtc_state); - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), - TRANS_PUSH_EN); - if (!intel_vrr_always_use_vrr_tg(display)) { intel_vrr_set_db_point_and_transmission_line(crtc_state); + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), + TRANS_PUSH_EN); + if (crtc_state->cmrr.enable) { intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | VRR_CTL_CMRR_ENABLE | -- cgit v1.2.3 From 43531282529d769b5df6552cf3ba25fb6c5964b2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:26 +0300 Subject: drm/i915/vrr: Move EMP_AS_SDP_TL write into intel_vrr_set_transcoder_timings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EMP_AS_SDL_TL replaces the TRANS_VRR_VSYNC for the purposes of setting the AS SDP transmission line. Move the EMP_AS_SDL_TL into intel_vrr_set_transcoder_timings() since that's where we write TRANS_VRR_VSYNC as well. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-11-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 35 +++++++++++--------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 6e8f8e673312..562a5feadaab 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -571,6 +571,18 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) TRANS_VRR_VSYNC(display, cpu_transcoder), VRR_VSYNC_END(crtc_state->vrr.vsync_end) | VRR_VSYNC_START(crtc_state->vrr.vsync_start)); + + /* + * For BMG and LNL+ onwards the EMP_AS_SDP_TL is used for programming + * double buffering point and transmission line for VRR packets for + * HDMI2.1/DP/eDP/DP->HDMI2.1 PCON. + * Since currently we support VRR only for DP/eDP, so this is programmed + * to for Adaptive Sync SDP to Vsync start. + */ + if (DISPLAY_VERx100(display) == 1401 || DISPLAY_VER(display) >= 20) + intel_de_write(display, + EMP_AS_SDP_TL(display, cpu_transcoder), + EMP_AS_SDP_DB_TL(crtc_state->vrr.vsync_start)); } void intel_vrr_send_push(struct intel_dsb *dsb, @@ -649,25 +661,6 @@ bool intel_vrr_always_use_vrr_tg(struct intel_display *display) return false; } -static -void intel_vrr_set_db_point_and_transmission_line(const struct intel_crtc_state *crtc_state) -{ - struct intel_display *display = to_intel_display(crtc_state); - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - - /* - * For BMG and LNL+ onwards the EMP_AS_SDP_TL is used for programming - * double buffering point and transmission line for VRR packets for - * HDMI2.1/DP/eDP/DP->HDMI2.1 PCON. - * Since currently we support VRR only for DP/eDP, so this is programmed - * to for Adaptive Sync SDP to Vsync start. - */ - if (DISPLAY_VERx100(display) == 1401 || DISPLAY_VER(display) >= 20) - intel_de_write(display, - EMP_AS_SDP_TL(display, cpu_transcoder), - EMP_AS_SDP_DB_TL(crtc_state->vrr.vsync_start)); -} - static int intel_vrr_hw_vmin(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -710,8 +703,6 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) intel_vrr_set_vrr_timings(crtc_state); if (!intel_vrr_always_use_vrr_tg(display)) { - intel_vrr_set_db_point_and_transmission_line(crtc_state); - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); @@ -773,8 +764,6 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); - intel_vrr_set_db_point_and_transmission_line(crtc_state); - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); } -- cgit v1.2.3 From 60de04226687ac35309142e0c8d5b34e7665711d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:27 +0300 Subject: drm/i915/vrr: Use trans_vrr_ctl() in intel_vrr_transcoder_disable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently intel_vrr_disable() writes TRANS_VRR_CTL() with trans_vrr_ctl(), whereas intel_vrr_transcoder_disable() always writes just a plain 0. Write trans_vrr_ctl() in both places to unify the code, allowing for more shared code in the future. Since the VRR timing generator will be disabled by the TRANS_VRR_CTL write it doesn't really matter what we write to the register (other than VRR_CTL_VRR_ENABLE that is). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-12-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 562a5feadaab..19b38ad77189 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -779,7 +779,8 @@ void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) if (!intel_vrr_possible(crtc_state)) return; - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), 0); + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), + trans_vrr_ctl(crtc_state)); intel_vrr_wait_for_live_status_clear(display, cpu_transcoder); -- cgit v1.2.3 From c4b44d182030edd7acefbfb7124dcb10a775f697 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:28 +0300 Subject: drm/i915/vrr: Extract intel_vrr_tg_disable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we always disable the VRR timing generator the same way we can extract the duplicated code into a helper. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-13-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 42 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 19b38ad77189..3ed6a56fb779 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -692,6 +692,22 @@ static void intel_vrr_set_vrr_timings(const struct intel_crtc_state *crtc_state) intel_vrr_hw_flipline(crtc_state) - 1); } +static void intel_vrr_tg_disable(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_display *display = to_intel_display(old_crtc_state); + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; + + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), + trans_vrr_ctl(old_crtc_state)); + + if (intel_de_wait_for_clear(display, + TRANS_VRR_STATUS(display, cpu_transcoder), + VRR_STATUS_VRR_EN_LIVE, 1000)) + drm_err(display->drm, "Timed out waiting for VRR live status to clear\n"); + + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); +} + void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -717,29 +733,15 @@ void intel_vrr_enable(const struct intel_crtc_state *crtc_state) } } -static void intel_vrr_wait_for_live_status_clear(struct intel_display *display, - enum transcoder cpu_transcoder) -{ - if (intel_de_wait_for_clear(display, - TRANS_VRR_STATUS(display, cpu_transcoder), - VRR_STATUS_VRR_EN_LIVE, 1000)) - drm_err(display->drm, "Timed out waiting for VRR live status to clear\n"); -} - void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) { struct intel_display *display = to_intel_display(old_crtc_state); - enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; if (!old_crtc_state->vrr.enable) return; - if (!intel_vrr_always_use_vrr_tg(display)) { - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - trans_vrr_ctl(old_crtc_state)); - intel_vrr_wait_for_live_status_clear(display, cpu_transcoder); - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); - } + if (!intel_vrr_always_use_vrr_tg(display)) + intel_vrr_tg_disable(old_crtc_state); intel_vrr_set_fixed_rr_timings(old_crtc_state); } @@ -771,7 +773,6 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!HAS_VRR(display)) return; @@ -779,12 +780,7 @@ void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) if (!intel_vrr_possible(crtc_state)) return; - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - trans_vrr_ctl(crtc_state)); - - intel_vrr_wait_for_live_status_clear(display, cpu_transcoder); - - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); + intel_vrr_tg_disable(crtc_state); } bool intel_vrr_is_fixed_rr(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From eaa81a600ae7e5c4d972f2d84aaa9ae8a6452aca Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:29 +0300 Subject: drm/i915/vrr: Extract intel_vrr_tg_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the VRR timing generator enable into intel_vrr_tg_enable(), as a counterpart to intel_vrr_tg_disable(). Note that the CMRR part is probably broken, but so are other things in the CMRR implementation, and thus it is currently disabled. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-14-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 44 ++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 3ed6a56fb779..b49121b2676c 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -692,6 +692,28 @@ static void intel_vrr_set_vrr_timings(const struct intel_crtc_state *crtc_state) intel_vrr_hw_flipline(crtc_state) - 1); } +static void intel_vrr_tg_enable(const struct intel_crtc_state *crtc_state, + bool cmrr_enable) +{ + struct intel_display *display = to_intel_display(crtc_state); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 vrr_ctl; + + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); + + vrr_ctl = VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state); + + /* + * FIXME this might be broken as bspec seems to imply that + * even VRR_CTL_CMRR_ENABLE is armed by TRANS_CMRR_N_HI + * when enabling CMRR (but not when disabling CMRR?). + */ + if (cmrr_enable) + vrr_ctl |= VRR_CTL_CMRR_ENABLE; + + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), vrr_ctl); +} + static void intel_vrr_tg_disable(const struct intel_crtc_state *old_crtc_state) { struct intel_display *display = to_intel_display(old_crtc_state); @@ -711,26 +733,14 @@ static void intel_vrr_tg_disable(const struct intel_crtc_state *old_crtc_state) void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return; intel_vrr_set_vrr_timings(crtc_state); - if (!intel_vrr_always_use_vrr_tg(display)) { - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), - TRANS_PUSH_EN); - - if (crtc_state->cmrr.enable) { - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - VRR_CTL_VRR_ENABLE | VRR_CTL_CMRR_ENABLE | - trans_vrr_ctl(crtc_state)); - } else { - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); - } - } + if (!intel_vrr_always_use_vrr_tg(display)) + intel_vrr_tg_enable(crtc_state, crtc_state->cmrr.enable); } void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) @@ -763,11 +773,7 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) return; } - intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), - TRANS_PUSH_EN); - - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); + intel_vrr_tg_enable(crtc_state, false); } void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From b10e7c92556c2688959d84b419dcb49a4d838b4d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:30 +0300 Subject: drm/i915/vrr: Disable VRR TG in intel_vrr_transcoder_disable() only on always use_vrr_tg() platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we always disable the VRR timing generator in intel_vrr_transcoder_disable(). But doing so on !always_use_vrr_tg() platforms is redundant since we've alreayd disabled the VRR timing generator earlier in intel_vrr_disable(). Do the disable in intel_vrr_transcoder_disable() only on always_on_vrr_tg() platforms. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-15-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index b49121b2676c..d8fbbef1ae23 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -786,7 +786,8 @@ void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) if (!intel_vrr_possible(crtc_state)) return; - intel_vrr_tg_disable(crtc_state); + if (intel_vrr_always_use_vrr_tg(display)) + intel_vrr_tg_disable(crtc_state); } bool intel_vrr_is_fixed_rr(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 9a78c6dfd1f2fb89b5a45dc914df471e159cec85 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:31 +0300 Subject: drm/i915/vrr: Always write TRANS_VRR_CTL in intel_vrr_set_transcoder_timings() on !always_use_vrr_tg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, depending on vrr.enable, we may write TRANS_VRR_CTL from both intel_vrr_set_transcoder_timings() and intel_vrr_transcoder_enable() on !always_use_vrr_tg() platforms. Streamline this so that we just always write it from intel_vrr_set_transcoder_timings(), and never from intel_vrr_transcoder_enable(). The main benefit is that intel_vrr_transcoder_enable() becomes symmetric to intel_vrr_transcoder_disable(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-16-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index d8fbbef1ae23..67b1ed606d8f 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -562,7 +562,7 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) intel_vrr_set_fixed_rr_timings(crtc_state); - if (!intel_vrr_always_use_vrr_tg(display) && !crtc_state->vrr.enable) + if (!intel_vrr_always_use_vrr_tg(display)) intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(crtc_state)); @@ -759,7 +759,6 @@ void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!HAS_VRR(display)) return; @@ -767,13 +766,8 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) if (!intel_vrr_possible(crtc_state)) return; - if (!intel_vrr_always_use_vrr_tg(display)) { - intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), - trans_vrr_ctl(crtc_state)); - return; - } - - intel_vrr_tg_enable(crtc_state, false); + if (intel_vrr_always_use_vrr_tg(display)) + intel_vrr_tg_enable(crtc_state, false); } void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 065d28dd32edc4c721b73c2b260b656536976668 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:32 +0300 Subject: drm/i915/vrr: Remove redundant HAS_VRR() checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_vrr_transcoder_{enable,disable}() already check for intel_vrr_possible(), so the extra HAS_VRR() checks are redundant. Remove them. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-17-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 67b1ed606d8f..b64a54d22991 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -760,9 +760,6 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - if (!HAS_VRR(display)) - return; - if (!intel_vrr_possible(crtc_state)) return; @@ -774,9 +771,6 @@ void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - if (!HAS_VRR(display)) - return; - if (!intel_vrr_possible(crtc_state)) return; -- cgit v1.2.3 From 78ea8eb5b6235b3ef68fa0fb8ffe0b3b490baf38 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:33 +0300 Subject: drm/i915/vrr: Move HAS_VRR() check into intel_vrr_set_transcoder_timings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce the clutter in hsw_configure_cpu_transcoder() a bit by moving the HAS_VRR() check into intel_vrr_set_transcoder_timings(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-18-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_display.c | 3 +-- drivers/gpu/drm/i915/display/intel_vrr.c | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 490b4f2907e1..2744f83bda2e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1581,8 +1581,7 @@ static void hsw_configure_cpu_transcoder(const struct intel_crtc_state *crtc_sta } intel_set_transcoder_timings(crtc_state); - if (HAS_VRR(display)) - intel_vrr_set_transcoder_timings(crtc_state); + intel_vrr_set_transcoder_timings(crtc_state); if (cpu_transcoder != TRANSCODER_EDP) intel_de_write(display, TRANS_MULT(display, cpu_transcoder), diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index b64a54d22991..29143dd092a8 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -534,6 +534,9 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + if (!HAS_VRR(display)) + return; + /* * This bit seems to have two meanings depending on the platform: * TGL: generate VRR "safe window" for DSB vblank waits -- cgit v1.2.3 From cbdf2a930b1756abd3e7747f8a8131934e5a6e66 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:34 +0300 Subject: drm/i915/vrr: s/crtc_state/old_crtc_state/ in intel_vrr_transcoder_disable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We generally use the 'old_crtc_state' in the disable functions to make it clear these generally get called when the hardware is still using the old crtc state rather than the new crtc state. Rename the intel_vrr_transcoder_disable() 'crtc_state' parameter to 'old_crtc_state' for consistency. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-19-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 29143dd092a8..71c5d8bf7557 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -770,15 +770,15 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) intel_vrr_tg_enable(crtc_state, false); } -void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state) +void intel_vrr_transcoder_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_display *display = to_intel_display(crtc_state); + struct intel_display *display = to_intel_display(old_crtc_state); - if (!intel_vrr_possible(crtc_state)) + if (!intel_vrr_possible(old_crtc_state)) return; if (intel_vrr_always_use_vrr_tg(display)) - intel_vrr_tg_disable(crtc_state); + intel_vrr_tg_disable(old_crtc_state); } bool intel_vrr_is_fixed_rr(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 24a23b39fda914f841e6ff1fb0dc8e690ce03395 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:35 +0300 Subject: drm/i915/vrr: Nuke intel_vrr_vblank_exit_length() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we always populate crtc_state->vrr.guardband even on ICL/TGL intel_vrr_vblank_exit_length() has become rather pointless. Get rid of it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-20-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 71c5d8bf7557..ba92e0a76855 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -143,10 +143,6 @@ static int intel_vrr_pipeline_full_to_guardband(const struct intel_crtc_state *c * * framestart_delay is programmable 1-4. */ -static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_state) -{ - return crtc_state->vrr.guardband; -} int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state) { @@ -161,12 +157,12 @@ int intel_vrr_vmax_vtotal(const struct intel_crtc_state *crtc_state) int intel_vrr_vmin_vblank_start(const struct intel_crtc_state *crtc_state) { - return intel_vrr_vmin_vtotal(crtc_state) - intel_vrr_vblank_exit_length(crtc_state); + return intel_vrr_vmin_vtotal(crtc_state) - crtc_state->vrr.guardband; } int intel_vrr_vmax_vblank_start(const struct intel_crtc_state *crtc_state) { - return intel_vrr_vmax_vtotal(crtc_state) - intel_vrr_vblank_exit_length(crtc_state); + return intel_vrr_vmax_vtotal(crtc_state) - crtc_state->vrr.guardband; } static bool -- cgit v1.2.3 From 0f4f31d79e557226334be5a47bb2e4acdcf1e923 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:36 +0300 Subject: drm/i915/vrr: Nuke intel_vrr_vmin_flipline() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that intel_vrr_flipline_offset() is completely hidden from the higher level VRR code, intel_vrr_vmin_flipline() has become rather pointless. Remove it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-21-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index ba92e0a76855..8875e5fe86aa 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -108,11 +108,6 @@ static int intel_vrr_vmin_flipline_offset(struct intel_display *display) return DISPLAY_VER(display) < 13 ? 1 : 0; } -static int intel_vrr_vmin_flipline(const struct intel_crtc_state *crtc_state) -{ - return crtc_state->vrr.vmin; -} - static int intel_vrr_guardband_to_pipeline_full(const struct intel_crtc_state *crtc_state, int guardband) { @@ -147,7 +142,7 @@ static int intel_vrr_pipeline_full_to_guardband(const struct intel_crtc_state *c int intel_vrr_vmin_vtotal(const struct intel_crtc_state *crtc_state) { /* Min vblank actually determined by flipline */ - return intel_vrr_vmin_flipline(crtc_state); + return crtc_state->vrr.vmin; } int intel_vrr_vmax_vtotal(const struct intel_crtc_state *crtc_state) @@ -781,7 +776,7 @@ bool intel_vrr_is_fixed_rr(const struct intel_crtc_state *crtc_state) { return crtc_state->vrr.flipline && crtc_state->vrr.flipline == crtc_state->vrr.vmax && - crtc_state->vrr.flipline == intel_vrr_vmin_flipline(crtc_state); + crtc_state->vrr.flipline == crtc_state->vrr.vmin; } void intel_vrr_get_config(struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From be5fc552b3dd0fe8fbe6231ed754be207b5df890 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:37 +0300 Subject: drm/i915/vrr: Update the intel_vrr_extra_vblank_delay() comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The coment in intel_vrr_extra_vblank_delay() is a bit outdated now that we generally got rid of the "vblank delay" stuff. Update the comment to better describe the current state of things. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-22-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 8875e5fe86aa..c28491b9002a 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -87,10 +87,8 @@ static int intel_vrr_extra_vblank_delay(struct intel_display *display) /* * On ICL/TGL VRR hardware inserts one extra scanline * just after vactive, which pushes the vmin decision - * boundary ahead accordingly. We'll include the extra - * scanline in our vblank delay estimates to make sure - * that we never underestimate how long we have until - * the delayed vblank has passed. + * boundary ahead accordingly, and thus reduces the + * max guardband length by one scanline. */ return DISPLAY_VER(display) < 13 ? 1 : 0; } -- cgit v1.2.3 From 4b274b0b61ab2a529e5c22e9aa033f3028e639fc Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Oct 2025 21:50:38 +0300 Subject: drm/i915/vrr: Check HAS_VRR() first in intel_vrr_is_capable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no point in doing all the other checks in intel_vrr_is_capable() if the platform doesn't support VRR at all Check HAS_VRR() before wasting time on the other checks. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251020185038.4272-23-ville.syrjala@linux.intel.com Reviewed-by: Ankit Nautiyal --- drivers/gpu/drm/i915/display/intel_vrr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index c28491b9002a..00cbc126fb36 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -25,6 +25,9 @@ bool intel_vrr_is_capable(struct intel_connector *connector) const struct drm_display_info *info = &connector->base.display_info; struct intel_dp *intel_dp; + if (!HAS_VRR(display)) + return false; + /* * DP Sink is capable of VRR video timings if * Ignore MSA bit is set in DPCD. @@ -49,8 +52,7 @@ bool intel_vrr_is_capable(struct intel_connector *connector) return false; } - return HAS_VRR(display) && - info->monitor_range.max_vfreq - info->monitor_range.min_vfreq > 10; + return info->monitor_range.max_vfreq - info->monitor_range.min_vfreq > 10; } bool intel_vrr_is_in_range(struct intel_connector *connector, int vrefresh) -- cgit v1.2.3 From fab36494f26c1818f0d02230f30aca45b4888848 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Oct 2025 13:08:35 -0700 Subject: drm/xe/configfs: Drop MAX_GT_TYPE_CHARS constant Early revisions of commit 7abd69278bb5 ("drm/xe/configfs: Add attribute to disable GT types") used MAX_GT_TYPE_CHARS not only to size the constant name field, but also for some of the string matching logic. By the time the patch finally landed, the constant was no longer needed for parsing. Stop using it for the string field definition as well; this eliminates the risk that we forget to update the constant if we ever add a GT type name longer than seven characters. Suggested-by: Gustavo Sousa Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20251024200834.1512329-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_configfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index c1419a270fa4..fad75650f3dd 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -301,7 +301,6 @@ struct engine_info { /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ #define MAX_ENGINE_CLASS_CHARS 5 #define MAX_ENGINE_INSTANCE_CHARS 2 -#define MAX_GT_TYPE_CHARS 7 static const struct engine_info engine_info[] = { { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, @@ -313,7 +312,7 @@ static const struct engine_info engine_info[] = { }; static const struct { - const char name[MAX_GT_TYPE_CHARS + 1]; + const char *name; enum xe_gt_type type; } gt_types[] = { { .name = "primary", .type = XE_GT_TYPE_MAIN }, -- cgit v1.2.3 From dd5d11b657120a8fc8b2a62784d5c1d2f2579acb Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 23 Oct 2025 17:44:54 +0530 Subject: drm/xe: Fix spelling and typos across Xe driver files Corrected various spelling mistakes and typos in multiple files under the Xe directory. These fixes improve clarity and maintain consistency in documentation. v2 - Replaced all instances of "XE" with "Xe" where it referred to the driver name - of -> for - Typical -> Typically v3 - Revert "Xe" to "XE" for macro prefix reference Signed-off-by: Sanjay Yadav Reviewed-by: Matthew Auld Reviewed-by: Stuart Summers Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20251023121453.1182035-2-sanjay.kumar.yadav@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- drivers/gpu/drm/xe/xe_bo_doc.h | 8 ++++---- drivers/gpu/drm/xe/xe_configfs.c | 2 +- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 8 ++++---- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_force_wake_types.h | 4 ++-- drivers/gpu/drm/xe/xe_gt_freq.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_log_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 2 +- drivers/gpu/drm/xe/xe_map.h | 4 ++-- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_migrate_doc.h | 2 +- drivers/gpu/drm/xe/xe_pm.c | 2 +- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 +- drivers/gpu/drm/xe/xe_range_fence.h | 4 ++-- drivers/gpu/drm/xe/xe_sched_job.c | 6 +++--- drivers/gpu/drm/xe/xe_sched_job.h | 12 ++++++------ drivers/gpu/drm/xe/xe_sched_job_types.h | 2 +- drivers/gpu/drm/xe/xe_svm.c | 2 +- drivers/gpu/drm/xe/xe_tlb_inval.h | 2 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 4 ++-- drivers/gpu/drm/xe/xe_uc_fw_types.h | 6 +++--- drivers/gpu/drm/xe/xe_uc_types.h | 2 +- drivers/gpu/drm/xe/xe_validation.h | 6 +++--- drivers/gpu/drm/xe/xe_vm.c | 10 +++++----- drivers/gpu/drm/xe/xe_vm_doc.h | 8 ++++---- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++-- 32 files changed, 63 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ad0da473ee65..bbda20d7c089 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2121,7 +2121,7 @@ void xe_bo_free(struct xe_bo *bo) * if the function should allocate a new one. * @tile: The tile to select for migration of this bo, and the tile used for * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. - * @resv: Pointer to a locked shared reservation object to use fo this bo, + * @resv: Pointer to a locked shared reservation object to use for this bo, * or NULL for the xe_bo to use its own. * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. * @size: The storage size to use for the bo. @@ -2651,7 +2651,7 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, * @size: The storage size to use for the bo. * @type: The TTM buffer object type. * @flags: XE_BO_FLAG_ flags. - * @intr: Whether to execut any waits for backing store interruptible. + * @intr: Whether to execute any waits for backing store interruptible. * * Create a pinned and mapped bo. The bo will be external and not associated * with a VM. diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h index 25a884c64bf1..401e7dd26ef3 100644 --- a/drivers/gpu/drm/xe/xe_bo_doc.h +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -12,7 +12,7 @@ * BO management * ============= * - * TTM manages (placement, eviction, etc...) all BOs in XE. + * TTM manages (placement, eviction, etc...) all BOs in Xe. * * BO creation * =========== @@ -29,7 +29,7 @@ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs * are typically mapped in the GGTT (any kernel BOs aside memory for page tables * are in the GGTT), are pinned (can't move or be evicted at runtime), have a - * vmap (XE can access the memory via xe_map layer) and have contiguous physical + * vmap (Xe can access the memory via xe_map layer) and have contiguous physical * memory. * * More details of why kernel BOs are pinned and contiguous below. @@ -40,7 +40,7 @@ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All - * user BOs are evictable and user BOs are never pinned by XE. The allocation of + * user BOs are evictable and user BOs are never pinned by Xe. The allocation of * the backing store can be deferred from creation time until first use which is * either mmap, bind, or pagefault. * @@ -84,7 +84,7 @@ * ==================== * * All eviction (or in other words, moving a BO from one memory location to - * another) is routed through TTM with a callback into XE. + * another) is routed through TTM with a callback into Xe. * * Runtime eviction * ---------------- diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index fad75650f3dd..9f6251b1008b 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -27,7 +27,7 @@ * Overview * ======== * - * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a + * Configfs is a filesystem-based manager of kernel objects. Xe KMD registers a * configfs subsystem called ``xe`` that creates a directory in the mounted * configfs directory. The user can create devices under this directory and * configure them as necessary. See Documentation/filesystems/configfs.rst for diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5f6a412b571c..47f5391ad8e9 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1217,7 +1217,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) * * /sys/bus/pci/devices//survivability_mode * - * - Admin/userpsace consumer can use firmware flashing tools like fwupd to flash + * - Admin/userspace consumer can use firmware flashing tools like fwupd to flash * firmware and restore device to normal operation. */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8f62ee7a73ac..dc17f63f9353 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -222,7 +222,7 @@ struct xe_tile { }; /** - * struct xe_device - Top level struct of XE device + * struct xe_device - Top level struct of Xe device */ struct xe_device { /** @drm: drm device */ @@ -245,9 +245,9 @@ struct xe_device { u32 media_verx100; /** @info.mem_region_mask: mask of valid memory regions */ u32 mem_region_mask; - /** @info.platform: XE platform enum */ + /** @info.platform: Xe platform enum */ enum xe_platform platform; - /** @info.subplatform: XE subplatform enum */ + /** @info.subplatform: Xe subplatform enum */ enum xe_subplatform subplatform; /** @info.devid: device ID */ u16 devid; @@ -661,7 +661,7 @@ struct xe_device { }; /** - * struct xe_file - file handle for XE driver + * struct xe_file - file handle for Xe driver */ struct xe_file { /** @xe: xe DEVICE **/ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 0dc27476832b..521467d976f7 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -33,7 +33,7 @@ * - Binding at exec time * - Flow controlling the ring at exec time * - * In XE we avoid all of this complication by not allowing a BO list to be + * In Xe we avoid all of this complication by not allowing a BO list to be * passed into an exec, using the dma-buf implicit sync uAPI, have binds as * separate operations, and using the DRM scheduler to flow control the ring. * Let's deep dive on each of these. diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index 899fbbcb3ea9..12d6e2367455 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -52,7 +52,7 @@ enum xe_force_wake_domains { }; /** - * struct xe_force_wake_domain - XE force wake domains + * struct xe_force_wake_domain - Xe force wake domains */ struct xe_force_wake_domain { /** @id: domain force wake id */ @@ -70,7 +70,7 @@ struct xe_force_wake_domain { }; /** - * struct xe_force_wake - XE force wake + * struct xe_force_wake - Xe force wake */ struct xe_force_wake { /** @gt: back pointers to GT */ diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 701349251bbc..e88f113226bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -36,7 +36,7 @@ * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. - * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. + * - rpa_freq: The Render Performance (RP) A level, which is the achievable one. * Calculated by PCODE at runtime based on multiple running conditions * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. * Calculated by PCODE at runtime based on multiple running conditions diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index d0b102ab6ce8..4c73a077d314 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -738,7 +738,7 @@ static void vf_start_migration_recovery(struct xe_gt *gt) gt->sriov.vf.migration.recovery_queued = true; WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true); - smp_wmb(); /* Ensure above writes visable before wake */ + smp_wmb(); /* Ensure above writes visible before wake */ xe_guc_ct_wake_waiters(>->uc.guc.ct); diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h index 70c132458ac3..48a8e092023f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -14,7 +14,7 @@ struct xe_bo; * struct xe_guc_ads - GuC additional data structures (ADS) */ struct xe_guc_ads { - /** @bo: XE BO for GuC ads blob */ + /** @bo: Xe BO for GuC ads blob */ struct xe_bo *bo; /** @golden_lrc_size: golden LRC size */ size_t golden_lrc_size; diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 8b03b50313d9..09d7ff1ef42a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -126,7 +126,7 @@ struct xe_fast_req_fence { * for the H2G and G2H requests sent and received through the buffers. */ struct xe_guc_ct { - /** @bo: XE BO for CT */ + /** @bo: Xe BO for CT */ struct xe_bo *bo; /** @lock: protects everything in CT layer */ struct mutex lock; diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h index b3d5c72ac752..02851b924aa4 100644 --- a/drivers/gpu/drm/xe/xe_guc_log_types.h +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -44,7 +44,7 @@ struct xe_guc_log_snapshot { struct xe_guc_log { /** @level: GuC log level */ u32 level; - /** @bo: XE BO for GuC log */ + /** @bo: Xe BO for GuC log */ struct xe_bo *bo; /** @stats: logging related stats */ struct { diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0ef67d3523a7..d4ffdb71ef3d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1920,7 +1920,7 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) } /* - * All of these functions are an abstraction layer which other parts of XE can + * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, * really shouldn't do much other than trap into the DRM scheduler which * synchronizes these operations. diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index 6bf2103602f8..a80175c7c478 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -207,7 +207,7 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { * @guc: GuC object * @tlb_inval: TLB invalidation client * - * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation + * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation * client to the GuC and setting GuC backend ops. */ void xe_guc_tlb_inval_init_early(struct xe_guc *guc, diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index f62e0c8b67ab..c44777125691 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -14,9 +14,9 @@ * DOC: Map layer * * All access to any memory shared with a device (both sysmem and vram) in the - * XE driver should go through this layer (xe_map). This layer is built on top + * Xe driver should go through this layer (xe_map). This layer is built on top * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` - * and with extra hooks into the XE driver that allows adding asserts to memory + * and with extra hooks into the Xe driver that allows adding asserts to memory * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 921c9c1ea41f..56a5804726e9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2060,7 +2060,7 @@ err: * * Copy from an array dma addresses to a VRAM device physical address * - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on + * Return: dma fence for migrate to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, @@ -2081,7 +2081,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * * Copy from a VRAM device physical address to an array dma addresses * - * Return: dma fence for migrate to signal completion on succees, ERR_PTR on + * Return: dma fence for migrate to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h index 63c7d67b5b62..c082bc0b7068 100644 --- a/drivers/gpu/drm/xe/xe_migrate_doc.h +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -9,7 +9,7 @@ /** * DOC: Migrate Layer * - * The XE migrate layer is used generate jobs which can copy memory (eviction), + * The Xe migrate layer is used generate jobs which can copy memory (eviction), * clear memory, or program tables (binds). This layer exists in every GT, has * a migrate engine, and uses a special VM for all generated jobs. * diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 53507e09f7bc..7b089e6fb63f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -102,7 +102,7 @@ static void xe_pm_block_end_signalling(void) /** * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend * - * Annotation to use where the code might block or sieze to make + * Annotation to use where the code might block or seize to make * progress pending resume completion. */ void xe_pm_might_block_on_suspend(void) diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index 312c3372a49f..ac125c697a41 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -12,7 +12,7 @@ struct xe_exec_queue; /** - * struct xe_preempt_fence - XE preempt fence + * struct xe_preempt_fence - Xe preempt fence * * hardware and triggers a callback once the xe_engine is complete. */ diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h index edd58b34f5c0..4934729dd904 100644 --- a/drivers/gpu/drm/xe/xe_range_fence.h +++ b/drivers/gpu/drm/xe/xe_range_fence.h @@ -13,13 +13,13 @@ struct xe_range_fence_tree; struct xe_range_fence; -/** struct xe_range_fence_ops - XE range fence ops */ +/** struct xe_range_fence_ops - Xe range fence ops */ struct xe_range_fence_ops { /** @free: free range fence op */ void (*free)(struct xe_range_fence *rfence); }; -/** struct xe_range_fence - XE range fence (address conflict tracking) */ +/** struct xe_range_fence - Xe range fence (address conflict tracking) */ struct xe_range_fence { /** @rb: RB tree node inserted into interval tree */ struct rb_node rb; diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index d21bf8f26964..6ae4cc6a3802 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -160,11 +160,11 @@ err_free: } /** - * xe_sched_job_destroy - Destroy XE schedule job - * @ref: reference to XE schedule job + * xe_sched_job_destroy - Destroy Xe schedule job + * @ref: reference to Xe schedule job * * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup - * base DRM schedule job, and free memory for XE schedule job. + * base DRM schedule job, and free memory for Xe schedule job. */ void xe_sched_job_destroy(struct kref *ref) { diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index 3dc72c5c1f13..b467131b6d5f 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -23,10 +23,10 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, void xe_sched_job_destroy(struct kref *ref); /** - * xe_sched_job_get - get reference to XE schedule job - * @job: XE schedule job object + * xe_sched_job_get - get reference to Xe schedule job + * @job: Xe schedule job object * - * Increment XE schedule job's reference count + * Increment Xe schedule job's reference count */ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) { @@ -35,10 +35,10 @@ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) } /** - * xe_sched_job_put - put reference to XE schedule job - * @job: XE schedule job object + * xe_sched_job_put - put reference to Xe schedule job + * @job: Xe schedule job object * - * Decrement XE schedule job's reference count, call xe_sched_job_destroy when + * Decrement Xe schedule job's reference count, call xe_sched_job_destroy when * reference count == 0. */ static inline void xe_sched_job_put(struct xe_sched_job *job) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 13e7a12b03ad..d26612abb4ca 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -32,7 +32,7 @@ struct xe_job_ptrs { }; /** - * struct xe_sched_job - XE schedule job (batch buffer tracking) + * struct xe_sched_job - Xe schedule job (batch buffer tracking) */ struct xe_sched_job { /** @drm: base DRM scheduler job */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 129e7818565c..13af589715a7 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -633,7 +633,7 @@ err_out: /* * XXX: We can't derive the GT here (or anywhere in this functions, but - * compute always uses the primary GT so accumlate stats on the likely + * compute always uses the primary GT so accumulate stats on the likely * GT of the fault. */ if (gt) diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h index 554634dfd4e2..05614915463a 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -33,7 +33,7 @@ void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait * @fence: TLB invalidation fence to wait on * - * Wait on a TLB invalidiation fence until it signals, non interruptable + * Wait on a TLB invalidiation fence until it signals, non interruptible */ static inline void xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 1144f9232ebb..a71e14818ec2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -10,7 +10,7 @@ #include /** - * struct xe_ttm_vram_mgr - XE TTM VRAM manager + * struct xe_ttm_vram_mgr - Xe TTM VRAM manager * * Manages placement of TTM resource in VRAM. */ @@ -32,7 +32,7 @@ struct xe_ttm_vram_mgr { }; /** - * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource + * struct xe_ttm_vram_mgr_resource - Xe TTM VRAM resource */ struct xe_ttm_vram_mgr_resource { /** @base: Base TTM resource */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 77a1dcf8b4ed..2ebe8c9db6ce 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -62,7 +62,7 @@ enum xe_uc_fw_type { }; /** - * struct xe_uc_fw_version - Version for XE micro controller firmware + * struct xe_uc_fw_version - Version for Xe micro controller firmware */ struct xe_uc_fw_version { /** @branch: branch version of the FW (not always available) */ @@ -84,7 +84,7 @@ enum xe_uc_fw_version_types { }; /** - * struct xe_uc_fw - XE micro controller firmware + * struct xe_uc_fw - Xe micro controller firmware */ struct xe_uc_fw { /** @type: type uC firmware */ @@ -112,7 +112,7 @@ struct xe_uc_fw { /** @size: size of uC firmware including css header */ size_t size; - /** @bo: XE BO for uC firmware */ + /** @bo: Xe BO for uC firmware */ struct xe_bo *bo; /** @has_gsc_headers: whether the FW image starts with GSC headers */ diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h index 9924e4484866..1708379dc834 100644 --- a/drivers/gpu/drm/xe/xe_uc_types.h +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -12,7 +12,7 @@ #include "xe_wopcm_types.h" /** - * struct xe_uc - XE micro controllers + * struct xe_uc - Xe micro controllers */ struct xe_uc { /** @guc: Graphics micro controller */ diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index fec331d791e7..1ef181c90434 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -108,7 +108,7 @@ struct xe_val_flags { * @request_exclusive: Whether to lock exclusively (write mode) the next time * the domain lock is locked. * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. - * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton. + * @nr: The drm_exec nr parameter used for drm_exec (re-)initialization. */ struct xe_validation_ctx { struct drm_exec *exec; @@ -137,7 +137,7 @@ bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret); * @_ret: The current error value possibly holding -ENOMEM * * Use this in way similar to drm_exec_retry_on_contention(). - * If @_ret contains -ENOMEM the tranaction is restarted once in a way that + * If @_ret contains -ENOMEM the transaction is restarted once in a way that * blocks other transactions and allows exhastive eviction. If the transaction * was already restarted once, Just return the -ENOMEM. May also set * _ret to -EINTR if not retrying and waits are interruptible. @@ -180,7 +180,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * @_val: The xe_validation_device. * @_exec: The struct drm_exec object * @_flags: Flags for the xe_validation_ctx initialization. - * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called. + * @_ret: Return in / out parameter. May be set by this macro. Typically 0 when called. * * This macro is will initiate a drm_exec transaction with additional support for * exhaustive eviction. diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 10d77666a425..00f3520dec38 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -824,7 +824,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops, * * (re)bind SVM range setting up GPU page tables for the range. * - * Return: dma fence for rebind to signal completion on succees, ERR_PTR on + * Return: dma fence for rebind to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, @@ -907,7 +907,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops, * * Unbind SVM range removing the GPU page tables for the range. * - * Return: dma fence for unbind to signal completion on succees, ERR_PTR on + * Return: dma fence for unbind to signal completion on success, ERR_PTR on * failure */ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, @@ -1291,7 +1291,7 @@ static u16 pde_pat_index(struct xe_bo *bo) * selection of options. The user PAT index is only for encoding leaf * nodes, where we have use of more bits to do the encoding. The * non-leaf nodes are instead under driver control so the chosen index - * here should be distict from the user PAT index. Also the + * here should be distinct from the user PAT index. Also the * corresponding coherency of the PAT index should be tied to the * allocation type of the page table (or at least we should pick * something which is always safe). @@ -4172,7 +4172,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) /** * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations - * @xe: Pointer to the XE device structure + * @xe: Pointer to the Xe device structure * @vma: Pointer to the virtual memory area (VMA) structure * @is_atomic: In pagefault path and atomic operation * @@ -4319,7 +4319,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); } else if (__op->op == DRM_GPUVA_OP_MAP) { vma = op->map.vma; - /* In case of madvise call, MAP will always be follwed by REMAP. + /* In case of madvise call, MAP will always be followed by REMAP. * Therefore temp_attr will always have sane values, making it safe to * copy them to new vma. */ diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 1030ce214032..02e5288373c9 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -7,7 +7,7 @@ #define _XE_VM_DOC_H_ /** - * DOC: XE VM (user address space) + * DOC: Xe VM (user address space) * * VM creation * =========== @@ -202,13 +202,13 @@ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO * was created and then a binding was created. We bypass creating a dummy BO in - * XE and simply create a binding directly from the userptr. + * Xe and simply create a binding directly from the userptr. * * Invalidation * ------------ * * Since this a core kernel managed memory the kernel can move this memory - * whenever it wants. We register an invalidation MMU notifier to alert XE when + * whenever it wants. We register an invalidation MMU notifier to alert Xe when * a user pointer is about to move. The invalidation notifier needs to block * until all pending users (jobs or compute mode engines) of the userptr are * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. @@ -419,7 +419,7 @@ * ======= * * VM locking protects all of the core data paths (bind operations, execs, - * evictions, and compute mode rebind worker) in XE. + * evictions, and compute mode rebind worker) in Xe. * * Locks * ----- diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index d6e2a0fdd4b3..830ed7b05c27 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -52,7 +52,7 @@ struct xe_vm_pgtable_update_op; * struct xe_vma_mem_attr - memory attributes associated with vma */ struct xe_vma_mem_attr { - /** @preferred_loc: perferred memory_location */ + /** @preferred_loc: preferred memory_location */ struct { /** @preferred_loc.migration_policy: Pages migration policy */ u32 migration_policy; @@ -338,7 +338,7 @@ struct xe_vm { u64 tlb_flush_seqno; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; - /** @xef: XE file handle for tracking this VM's drm client */ + /** @xef: Xe file handle for tracking this VM's drm client */ struct xe_file *xef; }; -- cgit v1.2.3 From 3764e9b99fa85ca51be9c08dc82c1121e4654f49 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 25 Oct 2025 14:49:05 +0200 Subject: drm/xe/pf: Fix VF FLR synchronization between all GTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If subsequent VF FLR request is triggered when previous VF FLR sequence is still being processed, we ignore it as not needed. But in case of the multi-GT platforms, one GT may already finish its VF FLR processing and will start a new sequence, which includes new cross-GT synchronization point. However, since other GT may be still busy with post-sync cleanup steps, this will put on hold this new FLR sequence, which might never finish due to lack of any future synchronization checkouts. Add additional cross-GT FLR synchronization point when each GT ends processing its own FLR sequence. This should also help to cover the case when one GT fails FLR processing before reaching the first synchronization point. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6287 Fixes: 2a8fcf7cc950 ("drm/xe/pf: Synchronize VF FLR between all GTs") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20251025124906.5264-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 2e6bd3d1fe1d..9de05db1f090 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -997,6 +997,8 @@ static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); + + xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); } } -- cgit v1.2.3 From 071089a69e199bd810ff31c4c933bd528e502743 Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Mon, 27 Oct 2025 14:56:43 +0530 Subject: drm/xe/xe3: Add WA_14024681466 for Xe3_LPG Apply WA_14024681466 to Xe3_LPG graphics IP versions from 30.00 to 30.05. v2: (Matthew Roper) - Remove stepping filter as workaround applies to all steppings. - Add an engine class filter so it only applies to the RENDER engine. Signed-off-by: Nitin Gote Link: https://patch.msgid.link/20251027092643.335904-1-nitin.r.gote@intel.com Reviewed-by: Matt Roper Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3545e0be06da..a895a8e801a9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -174,6 +174,7 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define FAST_CLEAR_VALIGN_FIX REG_BIT(13) #define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b6dcd9827354..ec638b431131 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -916,6 +916,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("14024681466"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { -- cgit v1.2.3 From 4504e780689245f01bee6ee4c19c74051bd87593 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 24 Oct 2025 22:58:25 +0200 Subject: drm/xe/pf: Access VF's register using dedicated MMIO view Instead of creating ad-hoc new register definitions with altered register addresses to mimic the VF's access to these registers, prepare new MMIO instance per required VF, with shifted internal location of the register map. This will allow to use unmodified register definitions in all calls to xe_mmio() functions. Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Link: https://patch.msgid.link/20251024205826.4652-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 36 ++++++++---------------------------- drivers/gpu/drm/xe/xe_mmio.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mmio.h | 4 ++++ 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c4dda87b47cc..0714c758b9c1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -158,39 +158,19 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) xe_gt_sriov_pf_service_update(gt); } -static u32 pf_get_vf_regs_stride(struct xe_device *xe) -{ - return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; -} - -static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride) -{ - struct xe_reg pf_reg = vf_reg; - - pf_reg.vf = 0; - pf_reg.addr += stride * vfid; - - return pf_reg; -} - static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid) { - u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt)); - struct xe_reg scratch; - int n, count; + struct xe_mmio mmio; + int n; + + xe_mmio_init_vf_view(&mmio, >->mmio, vfid); if (xe_gt_is_media_type(gt)) { - count = MED_VF_SW_FLAG_COUNT; - for (n = 0; n < count; n++) { - scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride); - xe_mmio_write32(>->mmio, scratch, 0); - } + for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), 0); } else { - count = VF_SW_FLAG_COUNT; - for (n = 0; n < count; n++) { - scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride); - xe_mmio_write32(>->mmio, scratch, 0); - } + for (n = 0; n < VF_SW_FLAG_COUNT; n++) + xe_mmio_write32(&mmio, VF_SW_FLAG(n), 0); } } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index ef6f3ea573a2..350dca1f0925 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -379,3 +379,32 @@ int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 va { return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false); } + +#ifdef CONFIG_PCI_IOV +static size_t vf_regs_stride(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000; +} + +/** + * xe_mmio_init_vf_view() - Initialize an MMIO instance for accesses like the VF + * @mmio: the target &xe_mmio to initialize as VF's view + * @base: the source &xe_mmio to initialize from + * @vfid: the VF identifier + */ +void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid) +{ + struct xe_tile *tile = base->tile; + struct xe_device *xe = tile->xe; + size_t offset = vf_regs_stride(xe) * vfid; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, vfid); + xe_assert(xe, !base->sriov_vf_gt); + xe_assert(xe, base->regs_size > offset); + + *mmio = *base; + mmio->regs += offset; + mmio->regs_size -= offset; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index c151ba569003..15362789ab99 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -42,4 +42,8 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) return &xe->tiles[0].mmio; } +#ifdef CONFIG_PCI_IOV +void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid); +#endif + #endif -- cgit v1.2.3 From 3ac635367eb589bee8edcc722f812a89970e14b7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 16 Oct 2025 13:55:27 -0500 Subject: drm/amd: Check that VPE has reached DPM0 in idle handler [Why] Newer VPE microcode has functionality that will decrease DPM level only when a workload has run for 2 or more seconds. If VPE is turned off before this DPM decrease and the PMFW doesn't reset it when power gating VPE, the SOC can get stuck with a higher DPM level. This can happen from amdgpu's ring buffer test because it's a short quick workload for VPE and VPE is turned off after 1s. [How] In idle handler besides checking fences are drained check PMFW version to determine if it will reset DPM when power gating VPE. If PMFW will not do this, then check VPE DPM level. If it is not DPM0 reschedule delayed work again until it is. v2: squash in return fix (Alex) Cc: Peyton.Lee@amd.com Reported-by: Sultan Alsawaf Reviewed-by: Sultan Alsawaf Tested-by: Sultan Alsawaf Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4615 Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 34 +++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 474bfe36c0c2..aa78c2ee9e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 1): + return adev->pm.fw_version < 0x0a640500; + default: + return false; + } +} + +static int vpe_get_dpm_level(struct amdgpu_device *adev) +{ + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + return 0; + + return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv)); +} + static void vpe_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -329,11 +349,17 @@ static void vpe_idle_work_handler(struct work_struct *work) unsigned int fences = 0; fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + if (fences) + goto reschedule; - if (fences == 0) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); - else - schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); + if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0) + goto reschedule; + + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return; + +reschedule: + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } static int vpe_common_init(struct amdgpu_vpe *vpe) -- cgit v1.2.3 From d0da43def215543d7b62a32af67e181619b3293a Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sat, 18 Oct 2025 14:44:49 +0900 Subject: drm/radeon: Clean up pdev->dev instances in probe Get a struct device pointer from the start and use it. Signed-off-by: Daniel Palmer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88e821d67af7..eb5138da8d5b 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -262,6 +262,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, unsigned long flags = 0; struct drm_device *ddev; struct radeon_device *rdev; + struct device *dev = &pdev->dev; const struct drm_format_info *format; int ret; @@ -277,7 +278,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, case CHIP_VERDE: case CHIP_OLAND: case CHIP_HAINAN: - dev_info(&pdev->dev, + dev_info(dev, "SI support disabled by module param\n"); return -ENODEV; } @@ -289,7 +290,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, case CHIP_HAWAII: case CHIP_KABINI: case CHIP_MULLINS: - dev_info(&pdev->dev, + dev_info(dev, "CIK support disabled by module param\n"); return -ENODEV; } @@ -303,11 +304,11 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - rdev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*rdev), ddev); + rdev = devm_drm_dev_alloc(dev, &kms_driver, typeof(*rdev), ddev); if (IS_ERR(rdev)) return PTR_ERR(rdev); - rdev->dev = &pdev->dev; + rdev->dev = dev; rdev->pdev = pdev; ddev = rdev_to_drm(rdev); ddev->dev_private = rdev; -- cgit v1.2.3 From 16c0681617b8a045773d4d87b6140002fa75b03b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sat, 18 Oct 2025 14:44:50 +0900 Subject: drm/radeon: Do not kfree() devres managed rdev Since the allocation of the drivers main structure was changed to devm_drm_dev_alloc() rdev is managed by devres and we shouldn't be calling kfree() on it. This fixes things exploding if the driver probe fails and devres cleans up the rdev after we already free'd it. Fixes: a9ed2f052c5c ("drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc") Signed-off-by: Daniel Palmer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kms.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 645e33bf7947..ba1446acd703 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -84,7 +84,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) rdev->agp = NULL; done_free: - kfree(rdev); dev->dev_private = NULL; } -- cgit v1.2.3 From 3eb8c0b4c091da0a623ade0d3ee7aa4a93df1ea4 Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sat, 18 Oct 2025 14:44:51 +0900 Subject: drm/radeon: Remove calls to drm_put_dev() Since the allocation of the drivers main structure was changed to devm_drm_dev_alloc() drm_put_dev()'ing to trigger it to be free'd should be done by devres. However, drm_put_dev() is still in the probe error and device remove paths. When the driver fails to probe warnings like the following are shown because devres is trying to drm_put_dev() after the driver already did it. [ 5.642230] radeon 0000:01:05.0: probe with driver radeon failed with error -22 [ 5.649605] ------------[ cut here ]------------ [ 5.649607] refcount_t: underflow; use-after-free. [ 5.649620] WARNING: CPU: 0 PID: 357 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110 Fixes: a9ed2f052c5c ("drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc") Signed-off-by: Daniel Palmer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index eb5138da8d5b..350f88af888d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -315,17 +315,17 @@ static int radeon_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) - goto err_free; + return ret; pci_set_drvdata(pdev, ddev); ret = radeon_driver_load_kms(ddev, flags); if (ret) - goto err_agp; + goto err; ret = drm_dev_register(ddev, flags); if (ret) - goto err_agp; + goto err; if (rdev->mc.real_vram_size <= (8 * 1024 * 1024)) format = drm_format_info(DRM_FORMAT_C8); @@ -338,30 +338,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, return 0; -err_agp: +err: pci_disable_device(pdev); -err_free: - drm_dev_put(ddev); return ret; } -static void -radeon_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - drm_put_dev(dev); -} - static void radeon_pci_shutdown(struct pci_dev *pdev) { - /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown - */ - if (radeon_device_is_virtual()) - radeon_pci_remove(pdev); - #if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a @@ -614,7 +598,6 @@ static struct pci_driver radeon_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, .probe = radeon_pci_probe, - .remove = radeon_pci_remove, .shutdown = radeon_pci_shutdown, .driver.pm = &radeon_pm_ops, }; -- cgit v1.2.3 From bc6d54ac7e7436721a19443265f971f890c13cc5 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Mar 2025 10:34:57 +0800 Subject: drm/amd/display: pause the workload setting in dm v1: Pause the workload setting in dm when doinn idle optimization v2: Rebase patch to latest kernel code base (kernel 6.16) Reviewed-by: Alex Deucher Signed-off-by: Kenneth Feng Signed-off-by: Alex Deucher Signed-off-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1ec9d03ad747..f08121a2b838 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -248,6 +248,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -277,7 +279,16 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) if (dm->active_vblank_irq_count == 0) { dc_post_update_surfaces_to_stream(dm->dc); + + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); + dc_allow_idle_optimizations(dm->dc, true); + + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); } mutex_unlock(&dm->dc_lock); -- cgit v1.2.3 From edddaada9e1c5620c184da8c2cdefbf31a930376 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Wed, 15 Oct 2025 09:39:44 +0800 Subject: drm/amdgpu: clear bad page info of ras module Clear bad page info of ras module. Signed-off-by: Jinzhou Su Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ff34e1c0d9e2..b22ce0a567f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -612,6 +612,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, return size; } +static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev); + /** * DOC: AMDGPU RAS debugfs EEPROM table reset interface * @@ -636,6 +638,11 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, (struct amdgpu_device *)file_inode(f)->i_private; int ret; + if (amdgpu_uniras_enabled(adev)) { + ret = amdgpu_uniras_clear_badpages_info(adev); + return ret ? ret : size; + } + ret = amdgpu_ras_eeprom_reset_table( &(amdgpu_ras_get_context(adev)->eeprom_control)); @@ -1543,6 +1550,21 @@ out_fini_err_data: return ret; } +static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev) +{ + struct ras_cmd_dev_handle req = {0}; + int ret; + + ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO, + &req, sizeof(req), NULL, 0); + if (ret) { + dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret); + return ret; + } + + return 0; +} + static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev, struct ras_query_if *info) { -- cgit v1.2.3 From 28d4de7ebf288d628cd3a03c0e57311b94a93ee7 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 14 Oct 2025 15:05:19 +0800 Subject: drm/amdgpu: get rev_id from strap register or IP-discovery table Query the sub-revision field in the IP Discovery table for the VFs to obtain their revision ID. Meanwhile, read the revision ID from the strap register for the PF. Signed-off-by: Perry Yuan Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 24 +++++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0d1dd587db5f..e716097dfde4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1843,6 +1843,10 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) && + adev->rev_id == 0x3) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) { vram_info = RREG32(regBIF_BIOS_SCRATCH_4); adev->gmc.vram_vendor = vram_info & 0xF; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 1c22bc11c1f8..bdfd2917e3ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -41,19 +41,21 @@ static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev) { - u32 tmp; - - tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0)); - /* If it is VF or subrevision holds a non-zero value, that should be used */ - if (tmp || amdgpu_sriov_vf(adev)) - return tmp; + u32 rev_id; - /* If discovery subrev is not updated, use register version */ - tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); - tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, - STRAP_ATI_REV_ID_DEV0_F0); + /* + * fetch the sub-revision field from the IP-discovery table + * (returns zero if the table entry is not populated). + */ + if (amdgpu_sriov_vf(adev)) { + rev_id = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0)); + } else { + rev_id = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + rev_id = REG_GET_FIELD(rev_id, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, + STRAP_ATI_REV_ID_DEV0_F0); + } - return tmp; + return rev_id; } static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable) -- cgit v1.2.3 From 214eb7e83d222ed542d41e23f3296ee71eedf31b Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Tue, 21 Oct 2025 09:31:11 +0800 Subject: drm/amdgpu: Add uniras version in sysfs Display uniras version in sysfs version interface when uniras enable. v2: display ras version detail info Signed-off-by: Jinzhou Su Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b22ce0a567f2..7f899d3e1bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1950,12 +1950,42 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); } +static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major, + u32 *minor, u32 *rev) +{ + int i; + + if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev)) + return false; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) { + *major = adev->ip_blocks[i].version->major; + *minor = adev->ip_blocks[i].version->minor; + *rev = adev->ip_blocks[i].version->rev; + return true; + } + } + + return false; +} + static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct amdgpu_ras *con = container_of(attr, struct amdgpu_ras, version_attr); - return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version); + u32 major, minor, rev; + ssize_t size = 0; + + size += sysfs_emit_at(buf, size, "table version: 0x%x\n", + con->eeprom_control.tbl_hdr.version); + + if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev)) + size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n", + major, minor, rev); + + return size; } static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, -- cgit v1.2.3 From 13ff4f63fcddfc84ec8632f1443936b00aa26725 Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Mon, 20 Oct 2025 16:09:34 -0700 Subject: drm/amd/display: Don't program BLNDGAM_MEM_PWR_FORCE when CM low-power is disabled on DCN30 Before commit 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`"), dpp3_program_blnd_lut(NULL) checked the low-power debug flag before calling dpp3_power_on_blnd_lut(false). After commit 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`"), dpp3_program_blnd_lut(NULL) unconditionally calls dpp3_power_on_blnd_lut(false). The BLNDGAM power helper writes BLNDGAM_MEM_PWR_FORCE when CM low-power is disabled, causing immediate SRAM power toggles instead of deferring at vupdate. This can disrupt atomic color/LUT sequencing during transitions between direct scanout and composition within gamescope's DRM backend on Steam Deck OLED. To fix this, leave the BLNDGAM power state unchanged when low-power is disabled, matching dpp3_power_on_hdr3dlut and dpp3_power_on_shaper. Fixes: 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`") Signed-off-by: Matthew Schwartz Reviewed-by: Harry Wentland Reviewed-by: Mario Limonciello Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 94d0dc3461d2..fe52df01caf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -582,9 +582,6 @@ static void dpp3_power_on_blnd_lut( dpp_base->ctx->dc->optimized_required = true; dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; } - } else { - REG_SET(CM_MEM_PWR_CTRL, 0, - BLNDGAM_MEM_PWR_FORCE, power_on == true ? 0 : 1); } } -- cgit v1.2.3 From fca0c66b22303de0d1d6313059baf4dc960a4753 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 22 Oct 2025 14:12:21 +0800 Subject: drm/amd/pm: fix smu table id bound check issue in smu_cmn_update_table() 'table_index' is a variable defined by the smu driver (kmd) 'table_id' is a variable defined by the hw smu (pmfw) This code should use table_index as a bounds check. Fixes: caad2613dc4bd ("drm/amd/powerplay: move table setting common code to smu_cmn.c") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index f532f7c69259..a8961a8f5c42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -969,7 +969,7 @@ int smu_cmn_update_table(struct smu_context *smu, table_index); uint32_t table_size; int ret = 0; - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) + if (!table_data || table_index >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; table_size = smu_table->tables[table_index].size; -- cgit v1.2.3 From c52238c9fb414555c68340cd80e487d982c1921c Mon Sep 17 00:00:00 2001 From: John Smith Date: Tue, 21 Oct 2025 11:08:13 +0200 Subject: drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Fiji Previously this was initialized with zero which represented PCIe Gen 1.0 instead of using the maximum value from the speed table which is the behaviour of all other smumgr implementations. Fixes: 18edef19ea44 ("drm/amd/powerplay: implement fw image related smu interface for Fiji.") Signed-off-by: John Smith Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index d2dbd90bb427..0a876c840c79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2024,7 +2024,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; table->VRConfig = 0; -- cgit v1.2.3 From 92b0a6ae6672857ddeabf892223943d2f0e06c97 Mon Sep 17 00:00:00 2001 From: John Smith Date: Tue, 21 Oct 2025 11:09:09 +0200 Subject: drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Iceland Previously this was initialized with zero which represented PCIe Gen 1.0 instead of using the maximum value from the speed table which is the behaviour of all other smumgr implementations. Fixes: 18aafc59b106 ("drm/amd/powerplay: implement fw related smu interface for iceland.") Signed-off-by: John Smith Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 1f50f1e74c48..aa3ae9b115c4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2028,7 +2028,7 @@ static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; result = iceland_populate_smc_svi2_config(hwmgr, table); -- cgit v1.2.3 From 6142aa066061919a436c34c8a182674b48636311 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Oct 2025 14:03:24 +0300 Subject: drm/amdgpu/userqueue: Fix use after free in amdgpu_userq_buffer_vas_list_cleanup() The amdgpu_userq_buffer_va_list_del() function frees "va_cursor" but it is dereferenced on the next line when we print the debug message. Print the debug message first and then free it. Fixes: 2a28f9665dca ("drm/amdgpu: track the userq bo va for its obj management") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9d4751a39c20..2200e0bbf040 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -159,9 +159,9 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, r = -EINVAL; goto err; } - amdgpu_userq_buffer_va_list_del(mapping, va_cursor); dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", queue, va_cursor->gpu_addr); + amdgpu_userq_buffer_va_list_del(mapping, va_cursor); } err: amdgpu_bo_unreserve(queue->vm->root.bo); -- cgit v1.2.3 From abd3f876404cafb107cb34bacb74706bfee11cbe Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:12:54 -0400 Subject: drm/amdgpu: fix SPDX headers on amdgpu_cper.c/h These should be MIT. The driver in general is MIT and the license text at the top of the files is MIT so fix it. Fixes: 92d5d2a09de1 ("drm/amdgpu: Introduce funcs for populating CPER") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index ef996493115f..425a3e564360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h index bcb97d245673..353421807387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * -- cgit v1.2.3 From 72c5482cb0f3d3c772c9de50e5a4265258a53f81 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:14:55 -0400 Subject: drm/amdgpu: fix SPDX header on amd_cper.h This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: 523b69c65445 ("drm/amd/include: Add amd cper header") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_cper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amd_cper.h b/drivers/gpu/drm/amd/include/amd_cper.h index 086869264425..a252ee4c7874 100644 --- a/drivers/gpu/drm/amd/include/amd_cper.h +++ b/drivers/gpu/drm/amd/include/amd_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * -- cgit v1.2.3 From 68c20d7b1779f97d600e61b9e95726c0cd609e2a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:17:37 -0400 Subject: drm/amdgpu: fix SPDX header on irqsrcs_vcn_5_0.h This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: d1bb64651095 ("drm/amdgpu: add irq source ids for VCN5_0/JPEG5_0") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h index 64b553e7de1a..e7fdcee22a71 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. -- cgit v1.2.3 From 102c4f7c554ac5a5ecf0023fa0612beb58e3b0bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:19:55 -0400 Subject: drm/amdgpu: fix SPDX header on cyan_skillfish_reg_init.c This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: e8529dbc75ca ("drm/amdgpu: add ip offset support for cyan skillfish") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c index 96616a865aac..ed1e25661706 100644 --- a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * -- cgit v1.2.3 From 00dc2ff519781309f1f1ff344a638f61c33884c7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 22 Oct 2025 17:46:26 +0530 Subject: drm/amdgpu: Make SR-IOV critical region checks overflow-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function amdgpu_virt_init_critical_region() contained an invalid check for a negative init_hdr_offset value: if (init_hdr_offset < 0) Since init_hdr_offset is an unsigned 32-bit integer, this condition can never be true and triggers a Smatch warning: warn: unsigned 'init_hdr_offset' is never less than zero In addition, the subsequent bounds check: if ((init_hdr_offset + init_hdr_size) > vram_size) was vulnerable to integer overflow when adding the two unsigned values. Thus, by promoting offset and size to 64-bit and using check_add_overflow() to safely validate the sum against VRAM size. Fixes: 07009df6494d ("drm/amdgpu: Introduce SRIOV critical regions v2 during VF init") Reported by: Dan Carpenter Cc: Ellen Pan Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Ellen Pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 66e9cd103597..45f2ad083338 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -937,9 +937,10 @@ static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) { struct amd_sriov_msg_init_data_header *init_data_hdr = NULL; - uint32_t init_hdr_offset = adev->virt.init_data_header.offset; - uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10; - uint64_t vram_size; + u64 init_hdr_offset = adev->virt.init_data_header.offset; + u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */ + u64 vram_size; + u64 end; int r = 0; uint8_t checksum = 0; @@ -957,7 +958,7 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) return -EINVAL; vram_size <<= 20; - if ((init_hdr_offset + init_hdr_size) > vram_size) { + if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) { dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n"); return -EINVAL; } -- cgit v1.2.3 From 6f1ee58a5e56c4a7b5b3fe9323e4510df156449c Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Tue, 21 Oct 2025 10:01:46 +0800 Subject: drm/amd/pm: smu13: Enable VCN_RESET for pgm 7 with appropriate firmware version This patch extends the VCN_RESET capability check to include pgm 7 when the firmware version is 0x07551400 or newer. Signed-off-by: Jesse Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index cc7a0e061ba1..0a7d2cea7dc6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -450,7 +450,8 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) ((pgm == 4) && (fw_ver >= 0x4557000))) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); - if ((pgm == 0) && (fw_ver >= 0x00558200)) + if ((pgm == 0 && fw_ver >= 0x00558200) || + (pgm == 7 && fw_ver >= 0x07551400)) smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); } -- cgit v1.2.3 From 84564d2920b8c858d96cb7471b45203d35f63b61 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 21 Oct 2025 15:14:06 +0530 Subject: drm/amdgpu: null check for hmm_pfns ptr before freeing it Due to low memory or when num of pages is too big to be accomodated, allocation could fail for pfn's. Chekc hmm_pfns for NULL before calling the kvfree for the it. Signed-off-by: Sunil Khatri Acked-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index d6f903a2d573..7e5a09b0bc78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -286,7 +286,9 @@ void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) if (!range) return; - kvfree(range->hmm_range.hmm_pfns); + if (range->hmm_range.hmm_pfns) + kvfree(range->hmm_range.hmm_pfns); + amdgpu_bo_unref(&range->bo); kfree(range); } -- cgit v1.2.3 From 90ef1dcb1d2bb84ad998e845e26a2a297a7ddfd6 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 23 Oct 2025 10:52:21 +0530 Subject: drm/amdgpu: Fix pointer casts when reading dynamic region sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function amdgpu_virt_get_dynamic_data_info() writes a 64-bit size value. In two places (amdgpu_bios.c and amdgpu_discovery.c), the code passed the address of a smaller variable by casting it to u64 *, which is unsafe. This could make the function write more bytes than the smaller variable can hold, possibly overwriting nearby memory. Reported by static analysis tools. v2: Dynamic region size comes from the host (SR-IOV setup) and is always fixed to 5 MB. (Lijo/Ellen) 5 MB easily fits inside a 32-bit value, so using a 64-bit type is not needed. It also avoids extra type casts Fixes: b4a8fcc7826a ("drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets") Reported by: Dan Carpenter Cc: Ellen Pan Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index db705bf723f1..35d04e69aec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -103,7 +103,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) { uint8_t __iomem *bios = NULL; resource_size_t vram_base; - resource_size_t size = 256 * 1024; /* ??? */ + u32 size = 256U * 1024U; /* ??? */ if (!(adev->flags & AMD_IS_APU)) if (amdgpu_device_need_post(adev)) @@ -126,7 +126,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) */ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) { if (amdgpu_virt_get_dynamic_data_info(adev, - AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, (uint64_t *)&size)) { + AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) { amdgpu_bios_release(adev); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a7cb4665f485..fa2a22dfa048 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -311,7 +311,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, */ if (amdgpu_virt_get_dynamic_data_info(adev, AMD_SRIOV_MSG_IPD_TABLE_ID, binary, - (uint64_t *)&adev->discovery.size)) { + &adev->discovery.size)) { dev_err(adev->dev, "failed to read discovery info from dynamic critical region."); ret = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 45f2ad083338..f2ce8f506aa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1102,7 +1102,7 @@ out: } int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev, - int data_id, uint8_t *binary, uint64_t *size) + int data_id, uint8_t *binary, u32 *size) { uint32_t data_offset = 0; uint32_t data_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 2a13cc892a13..14d864be5800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -443,7 +443,7 @@ void amdgpu_virt_init(struct amdgpu_device *adev); int amdgpu_virt_init_critical_region(struct amdgpu_device *adev); int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev, - int data_id, uint8_t *binary, uint64_t *size); + int data_id, uint8_t *binary, u32 *size); bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); -- cgit v1.2.3 From 1454642960b0a4fafd48b08919d39350ee2ac372 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 18 Jul 2025 14:20:45 -0500 Subject: drm/amd: Re-introduce property to control adaptive backlight modulation commit 0887054d14ae ("drm/amd: Drop abm_level property") dropped the abm level property in favor of sysfs control. Since then there have been discussions that compositors showed an interest in modifying a vendor specific property instead. So re-introduce the abm level property, but with different semantics. Rather than being an integer it's now an enum. One of the enum options is 'sysfs', and that is because there is still a sysfs file for use by userspace when the compositor doesn't support this property. If usespace has not modified this property, the default value will be for sysfs to control it. Once userspace has set the property stop allowing sysfs control. The property is only attached to non-OLED eDP panels. Cc: Xaver Hugl Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 60 ++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_display.h | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 39 ++++++++++++++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + 5 files changed, 106 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 51bab32fd8c6..c74b95bd41b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1365,6 +1365,64 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = { { AMDGPU_FMT_DITHER_ENABLE, "on" }, }; +/** + * DOC: property for adaptive backlight modulation + * + * The 'adaptive backlight modulation' property is used for the compositor to + * directly control the adaptive backlight modulation power savings feature + * that is part of DCN hardware. + * + * The property will be attached specifically to eDP panels that support it. + * + * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings' + * to be able to control it. + * If set to 'off' the compositor will ensure it stays off. + * The other values 'min', 'bias min', 'bias max', and 'max' will control the + * intensity of the power savings. + * + * Modifying this value can have implications on color accuracy, so tread + * carefully. + */ +static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev) +{ + const struct drm_prop_enum_list props[] = { + { ABM_SYSFS_CONTROL, "sysfs" }, + { ABM_LEVEL_OFF, "off" }, + { ABM_LEVEL_MIN, "min" }, + { ABM_LEVEL_BIAS_MIN, "bias min" }, + { ABM_LEVEL_BIAS_MAX, "bias max" }, + { ABM_LEVEL_MAX, "max" }, + }; + struct drm_property *prop; + int i; + + if (!adev->dc_enabled) + return 0; + + prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM, + "adaptive backlight modulation", + 6); + if (!prop) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(props); i++) { + int ret; + + ret = drm_property_add_enum(prop, props[i].type, + props[i].name); + + if (ret) { + drm_property_destroy(adev_to_drm(adev), prop); + + return ret; + } + } + + adev->mode_info.abm_level_property = prop; + + return 0; +} + int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) { int sz; @@ -1411,7 +1469,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); - return 0; + return amdgpu_display_setup_abm_prop(adev); } void amdgpu_display_update_priority(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 930c171473b4..49a29bf47a37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -55,4 +55,11 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev); int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb); +#define ABM_SYSFS_CONTROL -1 +#define ABM_LEVEL_OFF 0 +#define ABM_LEVEL_MIN 1 +#define ABM_LEVEL_BIAS_MIN 2 +#define ABM_LEVEL_BIAS_MAX 3 +#define ABM_LEVEL_MAX 4 + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 20460cfd09bc..dc8d2f52c7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -326,6 +326,8 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; + /* Adaptive Backlight Modulation (power feature) */ + struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ const struct drm_edid *bios_hardcoded_edid; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e3be2620f959..5b446e4000ef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5200,6 +5200,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm, static void setup_backlight_device(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector) { + struct amdgpu_dm_backlight_caps *caps; struct dc_link *link = aconnector->dc_link; int bl_idx = dm->num_of_edps; @@ -5219,6 +5220,13 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, dm->num_of_edps++; update_connector_ext_caps(aconnector); + caps = &dm->backlight_caps[aconnector->bl_idx]; + + /* Only offer ABM property when non-OLED and user didn't turn off by module parameter */ + if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) + drm_object_attach_property(&aconnector->base.base, + dm->adev->mode_info.abm_level_property, + ABM_SYSFS_CONTROL); } static void amdgpu_set_panel_orientation(struct drm_connector *connector); @@ -7291,6 +7299,20 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { dm_new_state->underscan_enable = val; ret = 0; + } else if (property == adev->mode_info.abm_level_property) { + switch (val) { + case ABM_SYSFS_CONTROL: + dm_new_state->abm_sysfs_forbidden = false; + break; + case ABM_LEVEL_OFF: + dm_new_state->abm_sysfs_forbidden = true; + dm_new_state->abm_level = ABM_LEVEL_IMMEDIATE_DISABLE; + break; + default: + dm_new_state->abm_sysfs_forbidden = true; + dm_new_state->abm_level = val; + }; + ret = 0; } return ret; @@ -7333,6 +7355,13 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { *val = dm_state->underscan_enable; ret = 0; + } else if (property == adev->mode_info.abm_level_property) { + if (!dm_state->abm_sysfs_forbidden) + *val = ABM_SYSFS_CONTROL; + else + *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ? + dm_state->abm_level : 0; + ret = 0; } return ret; @@ -7385,10 +7414,16 @@ static ssize_t panel_power_savings_store(struct device *device, return -EINVAL; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - to_dm_connector_state(connector->state)->abm_level = val ?: - ABM_LEVEL_IMMEDIATE_DISABLE; + if (to_dm_connector_state(connector->state)->abm_sysfs_forbidden) + ret = -EBUSY; + else + to_dm_connector_state(connector->state)->abm_level = val ?: + ABM_LEVEL_IMMEDIATE_DISABLE; drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (ret) + return ret; + drm_kms_helper_hotplug_event(dev); return count; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index db75e991ac7b..5a7aa903bd3c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -993,6 +993,7 @@ struct dm_connector_state { bool underscan_enable; bool freesync_capable; bool update_hdcp; + bool abm_sysfs_forbidden; uint8_t abm_level; int vcpi_slots; uint64_t pbn; -- cgit v1.2.3 From 9daa8f19e45b94cff1164082dd9d187f8753379c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 6 Oct 2025 10:09:09 -0500 Subject: drm/amd/display: Bump minimum for frame_warn_limit [Why] The bigger of CONFIG_FRAME_WARN and frame_warn_limit is used to trigger warnings about large stack frames. The dml_core_mode_support() stack frame has grown to 2056. [How] Update frame_warn_limit to 2056 so that CONFIG_FRAME_WARN of 2048 doesn't cause a failure. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4609 Reviewed-by: Aurabindo Pillai Signed-off-by: Mario Limonciello Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 4c21ce42054c..4ff0adef233a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -35,7 +35,7 @@ ifneq ($(CONFIG_FRAME_WARN),0) frame_warn_limit := 3072 endif else - frame_warn_limit := 2048 + frame_warn_limit := 2056 endif ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) -- cgit v1.2.3 From 423ef48d41cbcfd5ee2039085d59e1f29b020ef4 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 30 Sep 2025 12:17:13 -0400 Subject: drm/amd/display: Fix DMUB reset sequence for DCN32 [WHY&HOW] Backport reset sequence fixes implemented on DCN401 to DCN32 to address stability issues when resetting the DMUB. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c | 50 ++++++++++++++-------- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c | 17 ++++++-- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index ce041f6239dc..7e9856289910 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -89,50 +89,58 @@ static inline void dmub_dcn32_translate_addr(const union dmub_addr *addr_in, void dmub_dcn32_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 100000; - uint32_t in_reset, is_enabled, scratch, i, pwait_mode; + const uint32_t timeout_us = 1 * 1000 * 1000; //1s + const uint32_t poll_delay_us = 1; //1us + uint32_t i = 0; + uint32_t enabled, in_reset, scratch, pwait_mode; - REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); + REG_GET(DMCUB_CNTL, + DMCUB_ENABLE, &enabled); + REG_GET(DMCUB_CNTL2, + DMCUB_SOFT_RESET, &in_reset); - if (in_reset == 0 && is_enabled != 0) { + if (enabled && in_reset == 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - for (i = 0; i < timeout; ++i) { - if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) - break; - - udelay(1); - } - - for (i = 0; i < timeout; ++i) { + for (; i < timeout_us; i++) { scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; - udelay(1); + udelay(poll_delay_us); } - for (i = 0; i < timeout; ++i) { + for (; i < timeout_us; i++) { REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode); if (pwait_mode & (1 << 0)) break; - udelay(1); + udelay(poll_delay_us); } - /* Force reset in case we timed out, DMCUB is likely hung. */ } - if (is_enabled) { + if (enabled) { REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); udelay(1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); } + if (i >= timeout_us) { + /* timeout should never occur */ + BREAK_TO_DEBUGGER(); + } + + REG_UPDATE(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE, 0); + REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); @@ -141,7 +149,7 @@ void dmub_dcn32_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); - /* Clear the GPINT command manually so we don't send anything during boot. */ + /* Clear the GPINT command manually so we don't reset again. */ cmd.all = 0; dmub->hw_funcs.set_gpint(dmub, cmd); } @@ -163,7 +171,9 @@ void dmub_dcn32_backdoor_load(struct dmub_srv *dmub, dmub_dcn32_get_fb_base_offset(dmub, &fb_base, &fb_offset); + /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); dmub_dcn32_translate_addr(&cw0->offset, fb_base, fb_offset, &offset); @@ -193,7 +203,9 @@ void dmub_dcn32_backdoor_load_zfb_mode(struct dmub_srv *dmub, { union dmub_addr offset; + /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); offset = cw0->offset; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index b31adbd0d685..95542299e3b3 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -81,7 +81,7 @@ void dmub_dcn401_reset(struct dmub_srv *dmub) dmub->hw_funcs.set_gpint(dmub, cmd); for (; i < timeout_us; i++) { - scratch = dmub->hw_funcs.get_gpint_response(dmub); + scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; @@ -97,11 +97,24 @@ void dmub_dcn401_reset(struct dmub_srv *dmub) } } + if (enabled) { + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); + udelay(1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + } + if (i >= timeout_us) { /* timeout should never occur */ BREAK_TO_DEBUGGER(); } + REG_UPDATE(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, 0); + REG_UPDATE(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE, 0); + REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); @@ -134,7 +147,6 @@ void dmub_dcn401_backdoor_load(struct dmub_srv *dmub, /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); dmub_dcn401_translate_addr(&cw0->offset, fb_base, fb_offset, &offset); @@ -168,7 +180,6 @@ void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub, /* reset and disable DMCUB and MMHUBBUB DMUIF */ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); offset = cw0->offset; -- cgit v1.2.3 From 610cf76e9453b71f19e8c18e49876a8c5952e170 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 14 Oct 2025 13:05:26 -0400 Subject: drm/amd/display: Add opp count validation to dml2.1 Newer asics can have mismatching dpp and opp counts and dml needs to account for this. Reviewed-by: Charlene Liu Signed-off-by: Dmytro Laktyushkin Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 1 + .../dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 27 +++++++++++++++++++--- .../dml21/src/dml2_core/dml2_core_shared_types.h | 3 +++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 6ee37386f672..eba948e187c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -28,6 +28,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .writeback_interface_buffer_size_kbytes = 90, //Number of pipes after DCN Pipe harvesting .max_num_dpp = 4, + .max_num_opp = 4, .max_num_otg = 4, .max_num_wb = 1, .max_dchub_pscl_bw_pix_per_clk = 4, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 4ccdb179b001..f16792f79bef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4047,7 +4047,9 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode, bool UseDSC, unsigned int NumberOfDSCSlices, unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, unsigned int MaxNumDPP, + unsigned int MaxNumOPP, double DISPCLKRequired, unsigned int NumberOfDPPRequired, unsigned int MaxHActiveForDSC, @@ -4063,7 +4065,7 @@ static bool ValidateODMMode(enum dml2_odm_mode ODMMode, if (DISPCLKRequired > MaxDispclk) return false; - if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP) + if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP) return false; if (are_odm_segments_symmetrical) { if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle)) @@ -4109,7 +4111,9 @@ static noinline_for_stack void CalculateODMMode( double MaxDispclk, bool DSCEnable, unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, unsigned int MaxNumDPP, + unsigned int MaxNumOPP, double PixelClock, unsigned int NumberOfDSCSlices, @@ -4179,7 +4183,9 @@ static noinline_for_stack void CalculateODMMode( UseDSC, NumberOfDSCSlices, TotalNumberOfActiveDPP, + TotalNumberOfActiveOPP, MaxNumDPP, + MaxNumOPP, DISPCLKRequired, NumberOfDPPRequired, MaxHActiveForDSC, @@ -8358,6 +8364,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; mode_lib->ms.support.TotalAvailablePipesSupport = true; for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { @@ -8393,7 +8400,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.max_dispclk_freq_mhz, false, // DSCEnable mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8412,7 +8421,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.max_dispclk_freq_mhz, true, // DSCEnable mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8516,20 +8527,23 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 1; + mode_lib->ms.NoOfOPP[k] = 1; if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 4; + mode_lib->ms.NoOfOPP[k] = 4; } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 3; + mode_lib->ms.NoOfOPP[k] = 3; } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.NoOfOPP[k] = 2; } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { mode_lib->ms.MPCCombine[k] = true; mode_lib->ms.NoOfDPP[k] = 2; - mode_lib->ms.TotalNumberOfActiveDPP++; } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 1; @@ -8540,7 +8554,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { mode_lib->ms.MPCCombine[k] = true; mode_lib->ms.NoOfDPP[k] = 2; - mode_lib->ms.TotalNumberOfActiveDPP++; } } #if defined(__DML_VBA_DEBUG__) @@ -8548,8 +8561,16 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #endif } + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k]; + } if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) mode_lib->ms.support.TotalAvailablePipesSupport = false; + if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index ff1c47347610..051c31ec2f0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -36,6 +36,7 @@ struct dml2_core_ip_params { unsigned int max_line_buffer_lines; unsigned int writeback_interface_buffer_size_kbytes; unsigned int max_num_dpp; + unsigned int max_num_opp; unsigned int max_num_otg; unsigned int TDLUT_33cube_count; unsigned int max_num_wb; @@ -570,6 +571,7 @@ struct dml2_core_internal_mode_support { enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; unsigned int SurfaceSizeInMALL[DML2_MAX_PLANES]; unsigned int NoOfDPP[DML2_MAX_PLANES]; + unsigned int NoOfOPP[DML2_MAX_PLANES]; bool MPCCombine[DML2_MAX_PLANES]; double dcfclk_deepsleep; double MinDPPCLKUsingSingleDPP[DML2_MAX_PLANES]; @@ -580,6 +582,7 @@ struct dml2_core_internal_mode_support { bool PTEBufferSizeNotExceeded[DML2_MAX_PLANES]; bool DCCMetaBufferSizeNotExceeded[DML2_MAX_PLANES]; unsigned int TotalNumberOfActiveDPP; + unsigned int TotalNumberOfActiveOPP; unsigned int TotalNumberOfSingleDPPSurfaces; unsigned int TotalNumberOfDCCActiveDPP; unsigned int Total3dlutActive; -- cgit v1.2.3 From 7c228b1aabce1ee9c4d050cb68a43638dc5b3900 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Oct 2025 10:40:26 -0400 Subject: drm/amd/display: Fix wrong index for DCN401 cursor offload [Why] Payloads are ignored because the wrong index is written as part of the pipe update implementation for DCN401. [How] Align it to the DCN35 implementation and ensure the + 1 is added. Reviewed-by: Alvin Lee Signed-off-by: Nicholas Kazlauskas Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 23ecab4bcbba..b9c357a40707 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2978,7 +2978,7 @@ void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pip return; stream_idx = top_pipe->pipe_idx; - write_idx = cs->offload_streams[stream_idx].write_idx; + write_idx = cs->offload_streams[stream_idx].write_idx + 1; /* new payload (+1) */ payload_idx = write_idx % ARRAY_SIZE(cs->offload_streams[stream_idx].payloads); p = &cs->offload_streams[stream_idx].payloads[payload_idx].pipe_data[pipe->pipe_idx].dcn401; -- cgit v1.2.3 From 6cf0552e03033da74eff5137a10b33030c8a450a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Oct 2025 10:39:07 -0400 Subject: drm/amd/display: Fix notification of vtotal to DMU for cursor offload [Why] It was placed after the early return and the notification is never sent. [How] Place it after .set_drr and before the return. Reviewed-by: Alvin Lee Signed-off-by: Nicholas Kazlauskas Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4c964cf28f68..c4ef63011f47 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -493,13 +493,14 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, 1, *adjust); stream->adjust.timing_adjust_pending = false; + + if (dc->hwss.notify_cursor_offload_drr_update) + dc->hwss.notify_cursor_offload_drr_update(dc, dc->current_state, stream); + return true; } } - if (dc->hwss.notify_cursor_offload_drr_update) - dc->hwss.notify_cursor_offload_drr_update(dc, dc->current_state, stream); - return false; } -- cgit v1.2.3 From e6a8a000cfe6a1106c17ab4a47eb6dd21596968c Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Tue, 21 Oct 2025 13:52:58 +0800 Subject: drm/amd/display: Rename dml2 to dml2_0 folder [Why] dml2 folder contains all logic for all versions of DML2 This is currently DML2.0 and DML2.1. Rename dml2 to dml2_0 folder to reflect this better (dml2_0 for DML2.0). [How] Rename dml2 to dml2_0 folder and update dml2 references to use dml2_0 folder. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: waynelin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 4 +- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- drivers/gpu/drm/amd/display/dc/dml2/Makefile | 141 - drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h | 94 - .../drm/amd/display/dc/dml2/display_mode_core.c | 10335 -------------- .../drm/amd/display/dc/dml2/display_mode_core.h | 204 - .../display/dc/dml2/display_mode_core_structs.h | 2033 --- .../amd/display/dc/dml2/display_mode_lib_defines.h | 79 - .../drm/amd/display/dc/dml2/display_mode_util.c | 798 -- .../drm/amd/display/dc/dml2/display_mode_util.h | 76 - .../dc/dml2/dml21/dml21_translation_helper.c | 929 -- .../dc/dml2/dml21/dml21_translation_helper.h | 28 - .../drm/amd/display/dc/dml2/dml21/dml21_utils.c | 516 - .../drm/amd/display/dc/dml2/dml21/dml21_utils.h | 50 - .../drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 468 - .../drm/amd/display/dc/dml2/dml21/dml21_wrapper.h | 135 - .../dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h | 373 - .../dc/dml2/dml21/inc/dml2_external_lib_deps.h | 10 - .../drm/amd/display/dc/dml2/dml21/inc/dml_top.h | 46 - .../dc/dml2/dml21/inc/dml_top_dchub_registers.h | 189 - .../dc/dml2/dml21/inc/dml_top_display_cfg_types.h | 520 - .../dc/dml2/dml21/inc/dml_top_policy_types.h | 13 - .../dml2/dml21/inc/dml_top_soc_parameter_types.h | 215 - .../amd/display/dc/dml2/dml21/inc/dml_top_types.h | 750 -- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 661 - .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h | 12 - .../dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 13339 ------------------ .../dml21/src/dml2_core/dml2_core_dcn4_calcs.h | 39 - .../dml2/dml21/src/dml2_core/dml2_core_factory.c | 39 - .../dml2/dml21/src/dml2_core/dml2_core_factory.h | 13 - .../dml21/src/dml2_core/dml2_core_shared_types.h | 2335 ---- .../dc/dml2/dml21/src/dml2_core/dml2_core_utils.c | 786 -- .../dc/dml2/dml21/src/dml2_core/dml2_core_utils.h | 43 - .../dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 785 -- .../dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h | 14 - .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 50 - .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h | 13 - .../dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c | 198 - .../dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h | 13 - .../dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c | 39 - .../dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h | 13 - .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 706 - .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h | 22 - .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 2394 ---- .../dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h | 33 - .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c | 83 - .../dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h | 13 - .../src/dml2_standalone_libraries/lib_float_math.c | 147 - .../src/dml2_standalone_libraries/lib_float_math.h | 25 - .../dml2/dml21/src/dml2_top/dml2_top_interfaces.c | 49 - .../dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c | 10 - .../dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h | 9 - .../dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c | 1170 -- .../dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h | 14 - .../amd/display/dc/dml2/dml21/src/inc/dml2_debug.h | 189 - .../dml21/src/inc/dml2_internal_shared_types.h | 1004 -- .../amd/display/dc/dml2/dml2_dc_resource_mgmt.c | 1174 -- .../amd/display/dc/dml2/dml2_dc_resource_mgmt.h | 52 - .../gpu/drm/amd/display/dc/dml2/dml2_dc_types.h | 43 - .../drm/amd/display/dc/dml2/dml2_internal_types.h | 157 - .../drm/amd/display/dc/dml2/dml2_mall_phantom.c | 910 -- .../drm/amd/display/dc/dml2/dml2_mall_phantom.h | 52 - drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c | 311 - drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h | 47 - .../amd/display/dc/dml2/dml2_translation_helper.c | 1525 --- .../amd/display/dc/dml2/dml2_translation_helper.h | 41 - drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 560 - drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h | 149 - drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 704 - drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 309 - drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h | 32 - .../gpu/drm/amd/display/dc/dml2/dml_depedencies.h | 33 - .../amd/display/dc/dml2/dml_display_rq_dlg_calc.c | 573 - .../amd/display/dc/dml2/dml_display_rq_dlg_calc.h | 63 - drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h | 31 - drivers/gpu/drm/amd/display/dc/dml2_0/Makefile | 140 + drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h | 94 + .../drm/amd/display/dc/dml2_0/display_mode_core.c | 10337 ++++++++++++++ .../drm/amd/display/dc/dml2_0/display_mode_core.h | 204 + .../display/dc/dml2_0/display_mode_core_structs.h | 2032 +++ .../display/dc/dml2_0/display_mode_lib_defines.h | 79 + .../drm/amd/display/dc/dml2_0/display_mode_util.c | 798 ++ .../drm/amd/display/dc/dml2_0/display_mode_util.h | 74 + .../dc/dml2_0/dml21/dml21_translation_helper.c | 929 ++ .../dc/dml2_0/dml21/dml21_translation_helper.h | 28 + .../drm/amd/display/dc/dml2_0/dml21/dml21_utils.c | 516 + .../drm/amd/display/dc/dml2_0/dml21/dml21_utils.h | 50 + .../amd/display/dc/dml2_0/dml21/dml21_wrapper.c | 466 + .../amd/display/dc/dml2_0/dml21/dml21_wrapper.h | 135 + .../dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h | 372 + .../dc/dml2_0/dml21/inc/dml2_external_lib_deps.h | 10 + .../drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h | 46 + .../dc/dml2_0/dml21/inc/dml_top_dchub_registers.h | 189 + .../dml2_0/dml21/inc/dml_top_display_cfg_types.h | 520 + .../dc/dml2_0/dml21/inc/dml_top_policy_types.h | 13 + .../dml2_0/dml21/inc/dml_top_soc_parameter_types.h | 215 + .../display/dc/dml2_0/dml21/inc/dml_top_types.h | 750 ++ .../dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c | 661 + .../dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h | 12 + .../dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 13341 +++++++++++++++++++ .../dml21/src/dml2_core/dml2_core_dcn4_calcs.h | 39 + .../dml2_0/dml21/src/dml2_core/dml2_core_factory.c | 39 + .../dml2_0/dml21/src/dml2_core/dml2_core_factory.h | 13 + .../dml21/src/dml2_core/dml2_core_shared_types.h | 2335 ++++ .../dml2_0/dml21/src/dml2_core/dml2_core_utils.c | 786 ++ .../dml2_0/dml21/src/dml2_core/dml2_core_utils.h | 43 + .../dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 785 ++ .../dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h | 14 + .../dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c | 50 + .../dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h | 13 + .../dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c | 198 + .../dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h | 13 + .../dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c | 39 + .../dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h | 13 + .../dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c | 706 + .../dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h | 22 + .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 2394 ++++ .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h | 33 + .../dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c | 83 + .../dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h | 13 + .../src/dml2_standalone_libraries/lib_float_math.c | 147 + .../src/dml2_standalone_libraries/lib_float_math.h | 25 + .../dml21/src/dml2_top/dml2_top_interfaces.c | 49 + .../dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c | 10 + .../dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h | 9 + .../dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c | 1170 ++ .../dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h | 14 + .../display/dc/dml2_0/dml21/src/inc/dml2_debug.h | 189 + .../dml21/src/inc/dml2_internal_shared_types.h | 1004 ++ .../amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c | 1174 ++ .../amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h | 52 + .../gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h | 43 + .../amd/display/dc/dml2_0/dml2_internal_types.h | 157 + .../drm/amd/display/dc/dml2_0/dml2_mall_phantom.c | 911 ++ .../drm/amd/display/dc/dml2_0/dml2_mall_phantom.h | 52 + .../gpu/drm/amd/display/dc/dml2_0/dml2_policy.c | 311 + .../gpu/drm/amd/display/dc/dml2_0/dml2_policy.h | 47 + .../display/dc/dml2_0/dml2_translation_helper.c | 1528 +++ .../display/dc/dml2_0/dml2_translation_helper.h | 41 + drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c | 560 + drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h | 149 + .../gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c | 704 + .../gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h | 309 + drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h | 32 + .../drm/amd/display/dc/dml2_0/dml_depedencies.h | 34 + .../display/dc/dml2_0/dml_display_rq_dlg_calc.c | 573 + .../display/dc/dml2_0/dml_display_rq_dlg_calc.h | 63 + .../gpu/drm/amd/display/dc/dml2_0/dml_logging.h | 32 + .../drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 4 +- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 4 +- drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h | 2 +- .../amd/display/dc/resource/dcn32/dcn32_resource.c | 2 +- .../amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- .../display/dc/resource/dcn351/dcn351_resource.c | 2 +- .../amd/display/dc/resource/dcn36/dcn36_resource.c | 2 +- .../display/dc/resource/dcn401/dcn401_resource.c | 2 +- .../dcn401/dcn401_soc_and_ip_translator.h | 2 +- 161 files changed, 49049 insertions(+), 49046 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_lib_defines.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/Makefile create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_lib_defines.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index dc943abd6dba..7277ed21552f 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -36,7 +36,7 @@ DC_LIBS += dcn30 DC_LIBS += dcn301 DC_LIBS += dcn31 DC_LIBS += dml -DC_LIBS += dml2 +DC_LIBS += dml2_0 DC_LIBS += soc_and_ip_translator endif diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c4ef63011f47..246ea9754810 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -83,7 +83,7 @@ #include "hw_sequencer_private.h" #if defined(CONFIG_DRM_AMD_DC_FP) -#include "dml2/dml2_internal_types.h" +#include "dml2_0/dml2_internal_types.h" #include "soc_and_ip_translator.h" #endif diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bc5dedf5f60c..8d0cdbf14e17 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -95,7 +95,7 @@ #define DC_LOGGER \ dc->ctx->logger #define DC_LOGGER_INIT(logger) -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #define UNABLE_TO_SPLIT -1 diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index c61300a7cb1c..2de8ef4a58ec 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -35,8 +35,8 @@ #include "link_enc_cfg.h" #if defined(CONFIG_DRM_AMD_DC_FP) -#include "dml2/dml2_wrapper.h" -#include "dml2/dml2_internal_types.h" +#include "dml2_0/dml2_wrapper.h" +#include "dml2_0/dml2_internal_types.h" #endif #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 66d9da5426f1..182a35f6b439 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #include "dmub/inc/dmub_cmd.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile deleted file mode 100644 index 4ff0adef233a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ /dev/null @@ -1,141 +0,0 @@ -# SPDX-License-Identifier: MIT */ -# -# Copyright 2023 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Authors: AMD -# -# Makefile for dml2. - -dml2_ccflags := $(CC_FLAGS_FPU) -dml2_rcflags := $(CC_FLAGS_NO_FPU) - -ifneq ($(CONFIG_FRAME_WARN),0) - ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) - ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) - frame_warn_limit := 4096 - else - frame_warn_limit := 3072 - endif - else - frame_warn_limit := 2056 - endif - - ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) - frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) - endif -endif - -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/ -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_dpmm/ -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_pmo/ -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_standalone_libraries/ -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/inc -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/inc -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/ - -CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) -CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_wrapper.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_utils.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_policy.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_translation_helper.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_mall_phantom.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml_display_rq_dlg_calc.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_dc_resource_mgmt.o := $(dml2_ccflags) - -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_wrapper.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_utils.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_policy.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_translation_helper.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_mall_phantom.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml_display_rq_dlg_calc.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_dc_resource_mgmt.o := $(dml2_rcflags) - -DML2 = display_mode_core.o display_mode_util.o dml2_wrapper.o \ - dml2_utils.o dml2_policy.o dml2_translation_helper.o dml2_dc_resource_mgmt.o dml2_mall_phantom.o \ - dml_display_rq_dlg_calc.o - -AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DML2) - -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) - -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) - -DML21 := src/dml2_top/dml2_top_interfaces.o -DML21 += src/dml2_top/dml2_top_soc15.o -DML21 += src/dml2_core/dml2_core_dcn4.o -DML21 += src/dml2_core/dml2_core_utils.o -DML21 += src/dml2_core/dml2_core_factory.o -DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o -DML21 += src/dml2_dpmm/dml2_dpmm_factory.o -DML21 += src/dml2_mcg/dml2_mcg_dcn4.o -DML21 += src/dml2_mcg/dml2_mcg_factory.o -DML21 += src/dml2_pmo/dml2_pmo_dcn3.o -DML21 += src/dml2_pmo/dml2_pmo_factory.o -DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o -DML21 += src/dml2_standalone_libraries/lib_float_math.o -DML21 += dml21_translation_helper.o -DML21 += dml21_wrapper.o -DML21 += dml21_utils.o - -AMD_DAL_DML21 = $(addprefix $(AMDDALPATH)/dc/dml2/dml21/,$(DML21)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DML21) - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h b/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h deleted file mode 100644 index e450445bc05d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __CMNTYPES_H__ -#define __CMNTYPES_H__ - -#ifdef __GNUC__ -#if __GNUC__ == 4 && __GNUC_MINOR__ > 7 -typedef unsigned int uint; -#endif -#endif - -typedef signed char int8, *pint8; -typedef signed short int16, *pint16; -typedef signed int int32, *pint32; -typedef signed int64, *pint64; - -typedef unsigned char uint8, *puint8; -typedef unsigned short uint16, *puint16; -typedef unsigned int uint32, *puint32; -typedef unsigned uint64, *puint64; - -typedef unsigned long int ulong; -typedef unsigned char uchar; -typedef unsigned int uint; - -typedef void *pvoid; -typedef char *pchar; -typedef const void *const_pvoid; -typedef const char *const_pchar; - -typedef struct rgba_struct { - uint8 a; - uint8 r; - uint8 g; - uint8 b; -} rgba_t; - -typedef struct { - uint8 blue; - uint8 green; - uint8 red; - uint8 alpha; -} gen_color_t; - -typedef union { - uint32 val; - gen_color_t f; -} gen_color_u; - -// -// Types to make it easy to get or set the bits of a float/double. -// Avoids automatic casting from int to float and back. -// -#if 0 -typedef union { - uint32 i; - float f; -} uintfloat32; - -typedef union { - uint64 i; - double f; -} uintfloat64; - -#ifndef UNREFERENCED_PARAMETER -#define UNREFERENCED_PARAMETER(x) x = x -#endif -#endif - -#endif //__CMNTYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c deleted file mode 100644 index 4b9b2e84d381..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ /dev/null @@ -1,10335 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "display_mode_core.h" -#include "display_mode_util.h" -#include "display_mode_lib_defines.h" - -#include "dml_assert.h" - -#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 -#define TB_BORROWED_MAX 400 -#define DML_MAX_VSTARTUP_START 1023 - -// --------------------------- -// Declaration Begins -// --------------------------- -static void CalculateBytePerPixelAndBlockSizes( - enum dml_source_format_class SourcePixelFormat, - enum dml_swizzle_mode SurfaceTiling, - // Output - dml_uint_t *BytePerPixelY, - dml_uint_t *BytePerPixelC, - dml_float_t *BytePerPixelDETY, - dml_float_t *BytePerPixelDETC, - dml_uint_t *BlockHeight256BytesY, - dml_uint_t *BlockHeight256BytesC, - dml_uint_t *BlockWidth256BytesY, - dml_uint_t *BlockWidth256BytesC, - dml_uint_t *MacroTileHeightY, - dml_uint_t *MacroTileHeightC, - dml_uint_t *MacroTileWidthY, - dml_uint_t *MacroTileWidthC); - -static dml_float_t CalculateWriteBackDISPCLK( - enum dml_source_format_class WritebackPixelFormat, - dml_float_t PixelClock, - dml_float_t WritebackHRatio, - dml_float_t WritebackVRatio, - dml_uint_t WritebackHTaps, - dml_uint_t WritebackVTaps, - dml_uint_t WritebackSourceWidth, - dml_uint_t WritebackDestinationWidth, - dml_uint_t HTotal, - dml_uint_t WritebackLineBufferSize, - dml_float_t DISPCLKDPPCLKVCOSpeed); - -static void CalculateVMRowAndSwath( - struct display_mode_lib_scratch_st *s, - struct CalculateVMRowAndSwath_params_st *p); - -static void CalculateOutputLink( - dml_float_t PHYCLKPerState, - dml_float_t PHYCLKD18PerState, - dml_float_t PHYCLKD32PerState, - dml_float_t Downspreading, - dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, - enum dml_output_encoder_class Output, - enum dml_output_format_class OutputFormat, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_float_t PixelClockBackEnd, - dml_float_t ForcedOutputLinkBPP, - dml_uint_t DSCInputBitPerComponent, - dml_uint_t NumberOfDSCSlices, - dml_float_t AudioSampleRate, - dml_uint_t AudioSampleLayout, - enum dml_odm_mode ODMModeNoDSC, - enum dml_odm_mode ODMModeDSC, - enum dml_dsc_enable DSCEnable, - dml_uint_t OutputLinkDPLanes, - enum dml_output_link_dp_rate OutputLinkDPRate, - - // Output - dml_bool_t *RequiresDSC, - dml_bool_t *RequiresFEC, - dml_float_t *OutBpp, - enum dml_output_type_and_rate__type *OutputType, - enum dml_output_type_and_rate__rate *OutputRate, - dml_uint_t *RequiredSlots); - -static void CalculateODMMode( - dml_uint_t MaximumPixelsPerLinePerDSCUnit, - dml_uint_t HActive, - enum dml_output_encoder_class Output, - enum dml_output_format_class OutputFormat, - enum dml_odm_use_policy ODMUse, - dml_float_t StateDispclk, - dml_float_t MaxDispclk, - dml_bool_t DSCEnable, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t MaxNumDPP, - dml_float_t PixelClock, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKRampingMargin, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_uint_t NumberOfDSCSlices, - - // Output - dml_bool_t *TotalAvailablePipesSupport, - dml_uint_t *NumberOfDPP, - enum dml_odm_mode *ODMMode, - dml_float_t *RequiredDISPCLKPerSurface); - -static dml_float_t CalculateRequiredDispclk( - enum dml_odm_mode ODMMode, - dml_float_t PixelClock, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKRampingMargin, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_float_t MaxDispclkSingle); - -static void CalculateSinglePipeDPPCLKAndSCLThroughput( - dml_float_t HRatio, - dml_float_t HRatioChroma, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_float_t MaxDCHUBToPSCLThroughput, - dml_float_t MaxPSCLToLBThroughput, - dml_float_t PixelClock, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t HTaps, - dml_uint_t HTapsChroma, - dml_uint_t VTaps, - dml_uint_t VTapsChroma, - - // Output - dml_float_t *PSCL_THROUGHPUT, - dml_float_t *PSCL_THROUGHPUT_CHROMA, - dml_float_t *DPPCLKUsingSingleDPP); - -static void CalculateDPPCLK( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_float_t DPPCLKUsingSingleDPP[], - dml_uint_t DPPPerSurface[], - - // Output - dml_float_t *GlobalDPPCLK, - dml_float_t Dppclk[]); - -static void CalculateMALLUseForStaticScreen( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t MALLAllocatedForDCNFinal, - enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, - dml_uint_t SurfaceSizeInMALL[], - dml_bool_t one_row_per_frame_fits_in_buffer[], - - // Output - dml_bool_t UsesMALLForStaticScreen[]); - -static dml_uint_t dscceComputeDelay( - dml_uint_t bpc, - dml_float_t BPP, - dml_uint_t sliceWidth, - dml_uint_t numSlices, - enum dml_output_format_class pixelFormat, - enum dml_output_encoder_class Output); - -static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, - enum dml_output_encoder_class Output); - -static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, - struct CalculatePrefetchSchedule_params_st *p); - -static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed); - -static void CalculateDCCConfiguration( - dml_bool_t DCCEnabled, - dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t SurfaceWidthLuma, - dml_uint_t SurfaceWidthChroma, - dml_uint_t SurfaceHeightLuma, - dml_uint_t SurfaceHeightChroma, - dml_uint_t nomDETInKByte, - dml_uint_t RequestHeight256ByteLuma, - dml_uint_t RequestHeight256ByteChroma, - enum dml_swizzle_mode TilingFormat, - dml_uint_t BytePerPixelY, - dml_uint_t BytePerPixelC, - dml_float_t BytePerPixelDETY, - dml_float_t BytePerPixelDETC, - enum dml_rotation_angle SourceScan, - // Output - dml_uint_t *MaxUncompressedBlockLuma, - dml_uint_t *MaxUncompressedBlockChroma, - dml_uint_t *MaxCompressedBlockLuma, - dml_uint_t *MaxCompressedBlockChroma, - dml_uint_t *IndependentBlockLuma, - dml_uint_t *IndependentBlockChroma); - -static dml_uint_t CalculatePrefetchSourceLines( - dml_float_t VRatio, - dml_uint_t VTaps, - dml_bool_t Interlace, - dml_bool_t ProgressiveToInterlaceUnitInOPP, - dml_uint_t SwathHeight, - enum dml_rotation_angle SourceScan, - dml_bool_t ViewportStationary, - dml_uint_t SwathWidth, - dml_uint_t ViewportHeight, - dml_uint_t ViewportXStart, - dml_uint_t ViewportYStart, - - // Output - dml_uint_t *VInitPreFill, - dml_uint_t *MaxNumSwath); - -static dml_uint_t CalculateVMAndRowBytes( - dml_bool_t ViewportStationary, - dml_bool_t DCCEnable, - dml_uint_t NumberOfDPPs, - dml_uint_t BlockHeight256Bytes, - dml_uint_t BlockWidth256Bytes, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t SurfaceTiling, - dml_uint_t BytePerPixel, - enum dml_rotation_angle SourceScan, - dml_uint_t SwathWidth, - dml_uint_t ViewportHeight, - dml_uint_t ViewportXStart, - dml_uint_t ViewportYStart, - dml_bool_t GPUVMEnable, - dml_uint_t GPUVMMaxPageTableLevels, - dml_uint_t GPUVMMinPageSizeKBytes, - dml_uint_t PTEBufferSizeInRequests, - dml_uint_t Pitch, - dml_uint_t DCCMetaPitch, - dml_uint_t MacroTileWidth, - dml_uint_t MacroTileHeight, - - // Output - dml_uint_t *MetaRowByte, - dml_uint_t *PixelPTEBytesPerRow, - dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check - dml_uint_t *dpte_row_width_ub, - dml_uint_t *dpte_row_height, - dml_uint_t *dpte_row_height_linear, - dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, - dml_uint_t *dpte_row_width_ub_one_row_per_frame, - dml_uint_t *dpte_row_height_one_row_per_frame, - dml_uint_t *MetaRequestWidth, - dml_uint_t *MetaRequestHeight, - dml_uint_t *meta_row_width, - dml_uint_t *meta_row_height, - dml_uint_t *PixelPTEReqWidth, - dml_uint_t *PixelPTEReqHeight, - dml_uint_t *PTERequestSize, - dml_uint_t *DPDE0BytesFrame, - dml_uint_t *MetaPTEBytesFrame); - -static dml_float_t CalculateTWait( - dml_uint_t PrefetchMode, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, - dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - dml_bool_t DRRDisplay, - dml_float_t DRAMClockChangeLatency, - dml_float_t FCLKChangeLatency, - dml_float_t UrgentLatency, - dml_float_t SREnterPlusExitTime); - -static void CalculatePrefetchMode( - enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, - dml_uint_t *MinPrefetchMode, - dml_uint_t *MaxPrefetchMode); - -static void CalculateRowBandwidth( - dml_bool_t GPUVMEnable, - enum dml_source_format_class SourcePixelFormat, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_bool_t DCCEnable, - dml_float_t LineTime, - dml_uint_t MetaRowByteLuma, - dml_uint_t MetaRowByteChroma, - dml_uint_t meta_row_height_luma, - dml_uint_t meta_row_height_chroma, - dml_uint_t PixelPTEBytesPerRowLuma, - dml_uint_t PixelPTEBytesPerRowChroma, - dml_uint_t dpte_row_height_luma, - dml_uint_t dpte_row_height_chroma, - // Output - dml_float_t *meta_row_bw, - dml_float_t *dpte_row_bw); - -static void CalculateFlipSchedule( - dml_float_t HostVMInefficiencyFactor, - dml_float_t UrgentExtraLatency, - dml_float_t UrgentLatency, - dml_uint_t GPUVMMaxPageTableLevels, - dml_bool_t HostVMEnable, - dml_uint_t HostVMMaxNonCachedPageTableLevels, - dml_bool_t GPUVMEnable, - dml_uint_t HostVMMinPageSize, - dml_float_t PDEAndMetaPTEBytesPerFrame, - dml_float_t MetaRowBytes, - dml_float_t DPTEBytesPerRow, - dml_float_t BandwidthAvailableForImmediateFlip, - dml_uint_t TotImmediateFlipBytes, - enum dml_source_format_class SourcePixelFormat, - dml_float_t LineTime, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_float_t Tno_bw, - dml_bool_t DCCEnable, - dml_uint_t dpte_row_height, - dml_uint_t meta_row_height, - dml_uint_t dpte_row_height_chroma, - dml_uint_t meta_row_height_chroma, - dml_bool_t use_one_row_for_frame_flip, - - // Output - dml_float_t *DestinationLinesToRequestVMInImmediateFlip, - dml_float_t *DestinationLinesToRequestRowInImmediateFlip, - dml_float_t *final_flip_bw, - dml_bool_t *ImmediateFlipSupportedForPipe); - -static dml_float_t CalculateWriteBackDelay( - enum dml_source_format_class WritebackPixelFormat, - dml_float_t WritebackHRatio, - dml_float_t WritebackVRatio, - dml_uint_t WritebackVTaps, - dml_uint_t WritebackDestinationWidth, - dml_uint_t WritebackDestinationHeight, - dml_uint_t WritebackSourceHeight, - dml_uint_t HTotal); - -static void CalculateVUpdateAndDynamicMetadataParameters( - dml_uint_t MaxInterDCNTileRepeaters, - dml_float_t Dppclk, - dml_float_t DISPCLK, - dml_float_t DCFClkDeepSleep, - dml_float_t PixelClock, - dml_uint_t HTotal, - dml_uint_t VBlank, - dml_uint_t DynamicMetadataTransmittedBytes, - dml_uint_t DynamicMetadataLinesBeforeActiveRequired, - dml_uint_t InterlaceEnable, - dml_bool_t ProgressiveToInterlaceUnitInOPP, - dml_float_t *TSetup, - dml_float_t *Tdmbf, - dml_float_t *Tdmec, - dml_float_t *Tdmsks, - dml_uint_t *VUpdateOffsetPix, - dml_uint_t *VUpdateWidthPix, - dml_uint_t *VReadyOffsetPix); - -static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported); - -static dml_float_t TruncToValidBPP( - dml_float_t LinkBitRate, - dml_uint_t Lanes, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_float_t PixelClock, - dml_float_t DesiredBPP, - dml_bool_t DSCEnable, - enum dml_output_encoder_class Output, - enum dml_output_format_class Format, - dml_uint_t DSCInputBitPerComponent, - dml_uint_t DSCSlices, - dml_uint_t AudioRate, - dml_uint_t AudioLayout, - enum dml_odm_mode ODMModeNoDSC, - enum dml_odm_mode ODMModeDSC, - // Output - dml_uint_t *RequiredSlotsSingle); - -static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - struct display_mode_lib_scratch_st *s, - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p); - -static void CalculateDCFCLKDeepSleep( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t BytePerPixelY[], - dml_uint_t BytePerPixelC[], - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_uint_t SwathWidthY[], - dml_uint_t SwathWidthC[], - dml_uint_t DPPPerSurface[], - dml_float_t HRatio[], - dml_float_t HRatioChroma[], - dml_float_t PixelClock[], - dml_float_t PSCL_THROUGHPUT[], - dml_float_t PSCL_THROUGHPUT_CHROMA[], - dml_float_t Dppclk[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_uint_t ReturnBusWidth, - - // Output - dml_float_t *DCFCLKDeepSleep); - -static void CalculateUrgentBurstFactor( - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, - dml_uint_t swath_width_luma_ub, - dml_uint_t swath_width_chroma_ub, - dml_uint_t SwathHeightY, - dml_uint_t SwathHeightC, - dml_float_t LineTime, - dml_float_t UrgentLatency, - dml_float_t CursorBufferSize, - dml_uint_t CursorWidth, - dml_uint_t CursorBPP, - dml_float_t VRatio, - dml_float_t VRatioC, - dml_float_t BytePerPixelInDETY, - dml_float_t BytePerPixelInDETC, - dml_uint_t DETBufferSizeY, - dml_uint_t DETBufferSizeC, - // Output - dml_float_t *UrgentBurstFactorCursor, - dml_float_t *UrgentBurstFactorLuma, - dml_float_t *UrgentBurstFactorChroma, - dml_bool_t *NotEnoughUrgentLatencyHiding); - -static dml_float_t RequiredDTBCLK( - dml_bool_t DSCEnable, - dml_float_t PixelClock, - enum dml_output_format_class OutputFormat, - dml_float_t OutputBpp, - dml_uint_t DSCSlices, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_uint_t AudioRate, - dml_uint_t AudioLayoutSingle); - -static void UseMinimumDCFCLK( - struct display_mode_lib_scratch_st *scratch, - struct UseMinimumDCFCLK_params_st *p); - -static void CalculatePixelDeliveryTimes( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_float_t VRatioPrefetchY[], - dml_float_t VRatioPrefetchC[], - dml_uint_t swath_width_luma_ub[], - dml_uint_t swath_width_chroma_ub[], - dml_uint_t DPPPerSurface[], - dml_float_t HRatio[], - dml_float_t HRatioChroma[], - dml_float_t PixelClock[], - dml_float_t PSCL_THROUGHPUT[], - dml_float_t PSCL_THROUGHPUT_CHROMA[], - dml_float_t Dppclk[], - dml_uint_t BytePerPixelC[], - enum dml_rotation_angle SourceScan[], - dml_uint_t NumberOfCursors[], - dml_uint_t CursorWidth[], - dml_uint_t CursorBPP[], - dml_uint_t BlockWidth256BytesY[], - dml_uint_t BlockHeight256BytesY[], - dml_uint_t BlockWidth256BytesC[], - dml_uint_t BlockHeight256BytesC[], - - // Output - dml_float_t DisplayPipeLineDeliveryTimeLuma[], - dml_float_t DisplayPipeLineDeliveryTimeChroma[], - dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], - dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], - dml_float_t DisplayPipeRequestDeliveryTimeLuma[], - dml_float_t DisplayPipeRequestDeliveryTimeChroma[], - dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], - dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], - dml_float_t CursorRequestDeliveryTime[], - dml_float_t CursorRequestDeliveryTimePrefetch[]); - -static void CalculateMetaAndPTETimes( - dml_bool_t use_one_row_for_frame[], - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t GPUVMEnable, - dml_uint_t MetaChunkSize, - dml_uint_t MinMetaChunkSizeBytes, - dml_uint_t HTotal[], - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_float_t DestinationLinesToRequestRowInVBlank[], - dml_float_t DestinationLinesToRequestRowInImmediateFlip[], - dml_bool_t DCCEnable[], - dml_float_t PixelClock[], - dml_uint_t BytePerPixelY[], - dml_uint_t BytePerPixelC[], - enum dml_rotation_angle SourceScan[], - dml_uint_t dpte_row_height[], - dml_uint_t dpte_row_height_chroma[], - dml_uint_t meta_row_width[], - dml_uint_t meta_row_width_chroma[], - dml_uint_t meta_row_height[], - dml_uint_t meta_row_height_chroma[], - dml_uint_t meta_req_width[], - dml_uint_t meta_req_width_chroma[], - dml_uint_t meta_req_height[], - dml_uint_t meta_req_height_chroma[], - dml_uint_t dpte_group_bytes[], - dml_uint_t PTERequestSizeY[], - dml_uint_t PTERequestSizeC[], - dml_uint_t PixelPTEReqWidthY[], - dml_uint_t PixelPTEReqHeightY[], - dml_uint_t PixelPTEReqWidthC[], - dml_uint_t PixelPTEReqHeightC[], - dml_uint_t dpte_row_width_luma_ub[], - dml_uint_t dpte_row_width_chroma_ub[], - - // Output - dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], - dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], - dml_float_t DST_Y_PER_META_ROW_NOM_L[], - dml_float_t DST_Y_PER_META_ROW_NOM_C[], - dml_float_t TimePerMetaChunkNominal[], - dml_float_t TimePerChromaMetaChunkNominal[], - dml_float_t TimePerMetaChunkVBlank[], - dml_float_t TimePerChromaMetaChunkVBlank[], - dml_float_t TimePerMetaChunkFlip[], - dml_float_t TimePerChromaMetaChunkFlip[], - dml_float_t time_per_pte_group_nom_luma[], - dml_float_t time_per_pte_group_vblank_luma[], - dml_float_t time_per_pte_group_flip_luma[], - dml_float_t time_per_pte_group_nom_chroma[], - dml_float_t time_per_pte_group_vblank_chroma[], - dml_float_t time_per_pte_group_flip_chroma[]); - -static void CalculateVMGroupAndRequestTimes( - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t GPUVMEnable, - dml_uint_t GPUVMMaxPageTableLevels, - dml_uint_t HTotal[], - dml_uint_t BytePerPixelC[], - dml_float_t DestinationLinesToRequestVMInVBlank[], - dml_float_t DestinationLinesToRequestVMInImmediateFlip[], - dml_bool_t DCCEnable[], - dml_float_t PixelClock[], - dml_uint_t dpte_row_width_luma_ub[], - dml_uint_t dpte_row_width_chroma_ub[], - dml_uint_t vm_group_bytes[], - dml_uint_t dpde0_bytes_per_frame_ub_l[], - dml_uint_t dpde0_bytes_per_frame_ub_c[], - dml_uint_t meta_pte_bytes_per_frame_ub_l[], - dml_uint_t meta_pte_bytes_per_frame_ub_c[], - - // Output - dml_float_t TimePerVMGroupVBlank[], - dml_float_t TimePerVMGroupFlip[], - dml_float_t TimePerVMRequestVBlank[], - dml_float_t TimePerVMRequestFlip[]); - -static void CalculateStutterEfficiency( - struct display_mode_lib_scratch_st *scratch, - struct CalculateStutterEfficiency_params_st *p); - -static void CalculateSwathAndDETConfiguration( - struct display_mode_lib_scratch_st *scratch, - struct CalculateSwathAndDETConfiguration_params_st *p); - -static void CalculateSwathWidth( - dml_bool_t ForceSingleDPP, - dml_uint_t NumberOfActiveSurfaces, - enum dml_source_format_class SourcePixelFormat[], - enum dml_rotation_angle SourceScan[], - dml_bool_t ViewportStationary[], - dml_uint_t ViewportWidth[], - dml_uint_t ViewportHeight[], - dml_uint_t ViewportXStart[], - dml_uint_t ViewportYStart[], - dml_uint_t ViewportXStartC[], - dml_uint_t ViewportYStartC[], - dml_uint_t SurfaceWidthY[], - dml_uint_t SurfaceWidthC[], - dml_uint_t SurfaceHeightY[], - dml_uint_t SurfaceHeightC[], - enum dml_odm_mode ODMMode[], - dml_uint_t BytePerPixY[], - dml_uint_t BytePerPixC[], - dml_uint_t Read256BytesBlockHeightY[], - dml_uint_t Read256BytesBlockHeightC[], - dml_uint_t Read256BytesBlockWidthY[], - dml_uint_t Read256BytesBlockWidthC[], - dml_uint_t BlendingAndTiming[], - dml_uint_t HActive[], - dml_float_t HRatio[], - dml_uint_t DPPPerSurface[], - - // Output - dml_uint_t SwathWidthSingleDPPY[], - dml_uint_t SwathWidthSingleDPPC[], - dml_uint_t SwathWidthY[], - dml_uint_t SwathWidthC[], - dml_uint_t MaximumSwathHeightY[], - dml_uint_t MaximumSwathHeightC[], - dml_uint_t swath_width_luma_ub[], - dml_uint_t swath_width_chroma_ub[]); - -static dml_float_t CalculateExtraLatency( - dml_uint_t RoundTripPingLatencyCycles, - dml_uint_t ReorderingBytes, - dml_float_t DCFCLK, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t PixelChunkSizeInKByte, - dml_uint_t TotalNumberOfDCCActiveDPP, - dml_uint_t MetaChunkSize, - dml_float_t ReturnBW, - dml_bool_t GPUVMEnable, - dml_bool_t HostVMEnable, - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t NumberOfDPP[], - dml_uint_t dpte_group_bytes[], - dml_float_t HostVMInefficiencyFactor, - dml_uint_t HostVMMinPageSize, - dml_uint_t HostVMMaxNonCachedPageTableLevels); - -static dml_uint_t CalculateExtraLatencyBytes( - dml_uint_t ReorderingBytes, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t PixelChunkSizeInKByte, - dml_uint_t TotalNumberOfDCCActiveDPP, - dml_uint_t MetaChunkSize, - dml_bool_t GPUVMEnable, - dml_bool_t HostVMEnable, - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t NumberOfDPP[], - dml_uint_t dpte_group_bytes[], - dml_float_t HostVMInefficiencyFactor, - dml_uint_t HostVMMinPageSize, - dml_uint_t HostVMMaxNonCachedPageTableLevels); - -static dml_float_t CalculateUrgentLatency( - dml_float_t UrgentLatencyPixelDataOnly, - dml_float_t UrgentLatencyPixelMixedWithVMData, - dml_float_t UrgentLatencyVMDataOnly, - dml_bool_t DoUrgentLatencyAdjustment, - dml_float_t UrgentLatencyAdjustmentFabricClockComponent, - dml_float_t UrgentLatencyAdjustmentFabricClockReference, - dml_float_t FabricClockSingle); - -static dml_bool_t UnboundedRequest( - enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, - dml_uint_t TotalNumberOfActiveDPP, - dml_bool_t NoChromaOrLinear, - enum dml_output_encoder_class Output); - -static void CalculateSurfaceSizeInMall( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t MALLAllocatedForDCN, - enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], - dml_bool_t DCCEnable[], - dml_bool_t ViewportStationary[], - dml_uint_t ViewportXStartY[], - dml_uint_t ViewportYStartY[], - dml_uint_t ViewportXStartC[], - dml_uint_t ViewportYStartC[], - dml_uint_t ViewportWidthY[], - dml_uint_t ViewportHeightY[], - dml_uint_t BytesPerPixelY[], - dml_uint_t ViewportWidthC[], - dml_uint_t ViewportHeightC[], - dml_uint_t BytesPerPixelC[], - dml_uint_t SurfaceWidthY[], - dml_uint_t SurfaceWidthC[], - dml_uint_t SurfaceHeightY[], - dml_uint_t SurfaceHeightC[], - dml_uint_t Read256BytesBlockWidthY[], - dml_uint_t Read256BytesBlockWidthC[], - dml_uint_t Read256BytesBlockHeightY[], - dml_uint_t Read256BytesBlockHeightC[], - dml_uint_t ReadBlockWidthY[], - dml_uint_t ReadBlockWidthC[], - dml_uint_t ReadBlockHeightY[], - dml_uint_t ReadBlockHeightC[], - - // Output - dml_uint_t SurfaceSizeInMALL[], - dml_bool_t *ExceededMALLSize); - -static void CalculateDETBufferSize( - dml_uint_t DETSizeOverride[], - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - dml_bool_t ForceSingleDPP, - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t UnboundedRequestEnabled, - dml_uint_t nomDETInKByte, - dml_uint_t MaxTotalDETInKByte, - dml_uint_t ConfigReturnBufferSizeInKByte, - dml_uint_t MinCompressedBufferSizeInKByte, - dml_uint_t ConfigReturnBufferSegmentSizeInkByte, - dml_uint_t CompressedBufferSegmentSizeInkByteFinal, - enum dml_source_format_class SourcePixelFormat[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_uint_t RotesY[], - dml_uint_t RoundedUpMaxSwathSizeBytesC[], - dml_uint_t DPPPerSurface[], - // Output - dml_uint_t DETBufferSizeInKByte[], - dml_uint_t *CompressedBufferSizeInkByte); - -static void CalculateMaxDETAndMinCompressedBufferSize( - dml_uint_t ConfigReturnBufferSizeInKByte, - dml_uint_t ConfigReturnBufferSegmentSizeInKByte, - dml_uint_t ROBBufferSizeInKByte, - dml_uint_t MaxNumDPP, - dml_bool_t nomDETInKByteOverrideEnable, - dml_uint_t nomDETInKByteOverrideValue, - - // Output - dml_uint_t *MaxTotalDETInKByte, - dml_uint_t *nomDETInKByte, - dml_uint_t *MinCompressedBufferSizeInKByte); - -static dml_uint_t DSCDelayRequirement( - dml_bool_t DSCEnabled, - enum dml_odm_mode ODMMode, - dml_uint_t DSCInputBitPerComponent, - dml_float_t OutputBpp, - dml_uint_t HActive, - dml_uint_t HTotal, - dml_uint_t NumberOfDSCSlices, - enum dml_output_format_class OutputFormat, - enum dml_output_encoder_class Output, - dml_float_t PixelClock, - dml_float_t PixelClockBackEnd); - -static dml_bool_t CalculateVActiveBandwithSupport( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - dml_bool_t NotUrgentLatencyHiding[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[]); - -static void CalculatePrefetchBandwithSupport( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - dml_bool_t NotUrgentLatencyHiding[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_float_t cursor_bw_pre[], - dml_float_t prefetch_vmrow_bw[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[], - - // Output - dml_float_t *PrefetchBandwidth, - dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, - dml_float_t *FractionOfUrgentBandwidth, - dml_bool_t *PrefetchBandwidthSupport); - -static dml_float_t CalculateBandwidthAvailableForImmediateFlip( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t cursor_bw_pre[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[]); - -static void CalculateImmediateFlipBandwithSupport( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - enum dml_immediate_flip_requirement ImmediateFlipRequirement[], - dml_float_t final_flip_bw[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_float_t cursor_bw_pre[], - dml_float_t prefetch_vmrow_bw[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[], - - // Output - dml_float_t *TotalBandwidth, - dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, - dml_float_t *FractionOfUrgentBandwidth, - dml_bool_t *ImmediateFlipBandwidthSupport); - -// --------------------------- -// Declaration Ends -// --------------------------- - -static dml_uint_t dscceComputeDelay( - dml_uint_t bpc, - dml_float_t BPP, - dml_uint_t sliceWidth, - dml_uint_t numSlices, - enum dml_output_format_class pixelFormat, - enum dml_output_encoder_class Output) -{ - // valid bpc = source bits per component in the set of {8, 10, 12} - // valid bpp = increments of 1/16 of a bit - // min = 6/7/8 in N420/N422/444, respectively - // max = such that compression is 1:1 - //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) - //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} - //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} - - // fixed value - dml_uint_t rcModelSize = 8192; - - // N422/N420 operate at 2 pixels per clock - dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, - Delay, pixels; - - if (pixelFormat == dml_420) - pixelsPerClock = 2; - // #all other modes operate at 1 pixel per clock - else if (pixelFormat == dml_444) - pixelsPerClock = 1; - else if (pixelFormat == dml_n422) - pixelsPerClock = 2; - else - pixelsPerClock = 1; - - //initial transmit delay as per PPS - initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1)); - - //compute ssm delay - if (bpc == 8) - D = 81; - else if (bpc == 10) - D = 89; - else - D = 113; - - //divide by pixel per cycle to compute slice width as seen by DSC - w = sliceWidth / pixelsPerClock; - - //422 mode has an additional cycle of delay - if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422) - s = 0; - else - s = 1; - - //main calculation for the dscce - ix = initalXmitDelay + 45; - wx = (w + 2) / 3; - p = 3 * wx - w; - l0 = ix / w; - a = ix + p * l0; - ax = (a + 2) / 3 + D + 6 + 1; - L = (ax + wx - 1) / wx; - if ((ix % w) == 0 && p != 0) - lstall = 1; - else - lstall = 0; - Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; - - //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels - pixels = Delay * 3 * pixelsPerClock; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: bpc: %u\n", __func__, bpc); - dml_print("DML::%s: BPP: %f\n", __func__, BPP); - dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); - dml_print("DML::%s: numSlices: %u\n", __func__, numSlices); - dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); - dml_print("DML::%s: Output: %u\n", __func__, Output); - dml_print("DML::%s: pixels: %u\n", __func__, pixels); -#endif - return pixels; -} - -static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output) -{ - dml_uint_t Delay = 0; - - if (pixelFormat == dml_420) { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 0; - // dscc - input deserializer - Delay = Delay + 3; - // dscc gets pixels every other cycle - Delay = Delay + 2; - // dscc - input cdc fifo - Delay = Delay + 12; - // dscc gets pixels every other cycle - Delay = Delay + 13; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 7; - // dscc gets pixels every other cycle - Delay = Delay + 3; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output serializer - Delay = Delay + 1; - // sft - Delay = Delay + 1; - } else if (pixelFormat == dml_n422) { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 1; - // dscc - input deserializer - Delay = Delay + 5; - // dscc - input cdc fifo - Delay = Delay + 25; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 10; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output serializer - Delay = Delay + 1; - // sft - Delay = Delay + 1; - } else { - // sfr - Delay = Delay + 2; - // dsccif - Delay = Delay + 0; - // dscc - input deserializer - Delay = Delay + 3; - // dscc - input cdc fifo - Delay = Delay + 12; - // dscc - cdc uncertainty - Delay = Delay + 2; - // dscc - output cdc fifo - Delay = Delay + 7; - // dscc - output serializer - Delay = Delay + 1; - // dscc - cdc uncertainty - Delay = Delay + 2; - // sft - Delay = Delay + 1; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); - dml_print("DML::%s: Delay = %u\n", __func__, Delay); -#endif - - return Delay; -} - -static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, - struct CalculatePrefetchSchedule_params_st *p) -{ - struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals; - - s->MyError = false; - s->DPPCycles = 0; - s->DISPCLKCycles = 0; - s->DSTTotalPixelsAfterScaler = 0.0; - s->LineTime = 0.0; - s->dst_y_prefetch_equ = 0.0; - s->prefetch_bw_oto = 0.0; - s->Tvm_oto = 0.0; - s->Tr0_oto = 0.0; - s->Tvm_oto_lines = 0.0; - s->Tr0_oto_lines = 0.0; - s->dst_y_prefetch_oto = 0.0; - s->TimeForFetchingMetaPTE = 0.0; - s->TimeForFetchingRowInVBlank = 0.0; - s->LinesToRequestPrefetchPixelData = 0.0; - s->HostVMDynamicLevelsTrips = 0; - s->trip_to_mem = 0.0; - s->Tvm_trips = 0.0; - s->Tr0_trips = 0.0; - s->Tvm_trips_rounded = 0.0; - s->Tr0_trips_rounded = 0.0; - s->max_Tsw = 0.0; - s->Lsw_oto = 0.0; - s->Tpre_rounded = 0.0; - s->prefetch_bw_equ = 0.0; - s->Tvm_equ = 0.0; - s->Tr0_equ = 0.0; - s->Tdmbf = 0.0; - s->Tdmec = 0.0; - s->Tdmsks = 0.0; - s->prefetch_sw_bytes = 0.0; - s->prefetch_bw_pr = 0.0; - s->bytes_pp = 0.0; - s->dep_bytes = 0.0; - s->min_Lsw_oto = 0.0; - s->Tsw_est1 = 0.0; - s->Tsw_est3 = 0.0; - - if (p->GPUVMEnable == true && p->HostVMEnable == true) { - s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels; - } else { - s->HostVMDynamicLevelsTrips = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels); - dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); - dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); - dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable); - dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor); - dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); -#endif - CalculateVUpdateAndDynamicMetadataParameters( - p->MaxInterDCNTileRepeaters, - p->myPipe->Dppclk, - p->myPipe->Dispclk, - p->myPipe->DCFClkDeepSleep, - p->myPipe->PixelClock, - p->myPipe->HTotal, - p->myPipe->VBlank, - p->DynamicMetadataTransmittedBytes, - p->DynamicMetadataLinesBeforeActiveRequired, - p->myPipe->InterlaceEnable, - p->myPipe->ProgressiveToInterlaceUnitInOPP, - p->TSetup, - - // Output - &s->Tdmbf, - &s->Tdmec, - &s->Tdmsks, - p->VUpdateOffsetPix, - p->VUpdateWidthPix, - p->VReadyOffsetPix); - - s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; - s->trip_to_mem = p->UrgentLatency; - s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1); - - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem; - } else { - *p->Tdmdl = p->TWait + p->UrgentExtraLatency; - } - -#ifdef __DML_VBA_ALLOW_DELTA__ - if (DynamicMetadataEnable == false) { - *Tdmdl = 0.0; - } -#endif - - if (p->DynamicMetadataEnable == true) { - if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { - *p->NotEnoughTimeForDynamicMetadata = true; - dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - } else { - *p->NotEnoughTimeForDynamicMetadata = false; - } - } else { - *p->NotEnoughTimeForDynamicMetadata = false; - } - - *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0); - - if (p->myPipe->ScalerEnabled) - s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); - else - s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); - - s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); - - s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal; - - if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) - return true; - - *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0); - *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + - ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) + - ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); - dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); - dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); - dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); - dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); - dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); - dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); - dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); -#endif - - if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) - *p->DSTYAfterScaler = 1; - else - *p->DSTYAfterScaler = 0; - - s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; - *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); - *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal))); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); - dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); -#endif - - s->MyError = false; - - s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); - - if (p->GPUVMEnable == true) { - s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; - s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; - if (p->GPUVMPageTableLevels >= 3) { - *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1); - } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) { - s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime; - *p->Tno_bw = p->UrgentExtraLatency; - } else { - *p->Tno_bw = 0; - } - } else if (p->myPipe->DCCEnable == true) { - s->Tvm_trips_rounded = s->LineTime / 4.0; - s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; - *p->Tno_bw = 0; - } else { - s->Tvm_trips_rounded = s->LineTime / 4.0; - s->Tr0_trips_rounded = s->LineTime / 2.0; - *p->Tno_bw = 0; - } - s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0); - s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0); - - if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) { - s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4; - } else { - s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; - } - - s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface; - if (p->myPipe->VRatio < 1.0) - s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; - - s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); - - s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); - - s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__; - s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0); - s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; - - if (p->GPUVMEnable == true) { - s->Tvm_oto = dml_max3( - s->Tvm_trips, - *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, - s->LineTime / 4.0); - } else - s->Tvm_oto = s->LineTime / 4.0; - - if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { - s->Tr0_oto = dml_max4( - s->Tr0_trips, - (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto, - (s->LineTime - s->Tvm_oto)/2.0, - s->LineTime / 4.0); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4); -#endif - } else - s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0; - - s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; - s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; - s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; - - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal); - s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); - dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); - dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw); - dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); - dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); - dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); - dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); - dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); -#endif - - s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; - s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; - - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); - - dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime); - dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup); - dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); - dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); - dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); - dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); - dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); - dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - } - - *p->DestinationLinesToRequestVMInVBlank = 0; - *p->DestinationLinesToRequestRowInVBlank = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixDataBWLuma = 0; - if (s->dst_y_prefetch_equ > 1) { - - if (s->Tpre_rounded - *p->Tno_bw > 0) { - s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte - + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor - + s->prefetch_sw_bytes) - / (s->Tpre_rounded - *p->Tno_bw); - s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1; - } else - s->PrefetchBandwidth1 = 0; - - if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) { - s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); - } - - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) - s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else - s->PrefetchBandwidth2 = 0; - - if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { - s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - s->Tvm_trips_rounded); - s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3; - } - else - s->PrefetchBandwidth3 = 0; - - - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) { - s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); - } - - if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) - s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); - else - s->PrefetchBandwidth4 = 0; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded); - dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); - dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1); - dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2); - dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3); - dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4); -#endif - { - dml_bool_t Case1OK; - dml_bool_t Case2OK; - dml_bool_t Case3OK; - - if (s->PrefetchBandwidth1 > 0) { - if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) { - Case1OK = true; - } else { - Case1OK = false; - } - } else { - Case1OK = false; - } - - if (s->PrefetchBandwidth2 > 0) { - if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) { - Case2OK = true; - } else { - Case2OK = false; - } - } else { - Case2OK = false; - } - - if (s->PrefetchBandwidth3 > 0) { - if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) { - Case3OK = true; - } else { - Case3OK = false; - } - } else { - Case3OK = false; - } - - if (Case1OK) { - s->prefetch_bw_equ = s->PrefetchBandwidth1; - } else if (Case2OK) { - s->prefetch_bw_equ = s->PrefetchBandwidth2; - } else if (Case3OK) { - s->prefetch_bw_equ = s->PrefetchBandwidth3; - } else { - s->prefetch_bw_equ = s->PrefetchBandwidth4; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK); - dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK); - dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); -#endif - - if (s->prefetch_bw_equ > 0) { - if (p->GPUVMEnable == true) { - s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4); - } else { - s->Tvm_equ = s->LineTime / 4; - } - - if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { - s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4); - } else { - s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2; - } - } else { - s->Tvm_equ = 0; - s->Tr0_equ = 0; - dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__); - } - } - - - if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { - *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto; - s->TimeForFetchingMetaPTE = s->Tvm_oto; - s->TimeForFetchingRowInVBlank = s->Tr0_oto; - - *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; - *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ; - s->TimeForFetchingMetaPTE = s->Tvm_equ; - s->TimeForFetchingRowInVBlank = s->Tr0_equ; - - if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) { - *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; - *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; - *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } - } - - s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch); - dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); - dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); -#endif - - if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) { - *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; - *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); - dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); -#endif - if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { - if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { - *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, - (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); - } else { - s->MyError = true; - dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); - *p->VRatioPrefetchY = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); -#endif - } - - *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; - *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); - dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); -#endif - if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { - if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { - *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); - } else { - s->MyError = true; - dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); - *p->VRatioPrefetchC = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); -#endif - } - - *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData - * p->myPipe->BytePerPixelY - * p->swath_width_luma_ub / s->LineTime; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma); -#endif - *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData - *p->myPipe->BytePerPixelC - *p->swath_width_chroma_ub / s->LineTime; - } else { - s->MyError = true; - dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData); - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixDataBWLuma = 0; - *p->RequiredPrefetchPixDataBWChroma = 0; - } - - dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE); - dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank); - dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime); - dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime); - dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); - dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); - - } else { - s->MyError = true; - dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - s->TimeForFetchingMetaPTE = 0; - s->TimeForFetchingRowInVBlank = 0; - *p->DestinationLinesToRequestVMInVBlank = 0; - *p->DestinationLinesToRequestRowInVBlank = 0; - s->LinesToRequestPrefetchPixelData = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixDataBWLuma = 0; - *p->RequiredPrefetchPixDataBWChroma = 0; - } - - { - dml_float_t prefetch_vm_bw; - dml_float_t prefetch_row_bw; - - if (p->PDEAndMetaPTEBytesFrame == 0) { - prefetch_vm_bw = 0; - } else if (*p->DestinationLinesToRequestVMInVBlank > 0) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); - dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); -#endif - prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); -#endif - } else { - prefetch_vm_bw = 0; - s->MyError = true; - dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank); - } - - if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) { - prefetch_row_bw = 0; - } else if (*p->DestinationLinesToRequestRowInVBlank > 0) { - prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); - dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); - dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); -#endif - } else { - prefetch_row_bw = 0; - s->MyError = true; - dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank); - } - - *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); - } - - if (s->MyError) { - s->TimeForFetchingMetaPTE = 0; - s->TimeForFetchingRowInVBlank = 0; - *p->DestinationLinesToRequestVMInVBlank = 0; - *p->DestinationLinesToRequestRowInVBlank = 0; - *p->DestinationLinesForPrefetch = 0; - s->LinesToRequestPrefetchPixelData = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixDataBWLuma = 0; - *p->RequiredPrefetchPixDataBWChroma = 0; - } - - return s->MyError; -} // CalculatePrefetchSchedule - -static void CalculateBytePerPixelAndBlockSizes( - enum dml_source_format_class SourcePixelFormat, - enum dml_swizzle_mode SurfaceTiling, - - // Output - dml_uint_t *BytePerPixelY, - dml_uint_t *BytePerPixelC, - dml_float_t *BytePerPixelDETY, - dml_float_t *BytePerPixelDETC, - dml_uint_t *BlockHeight256BytesY, - dml_uint_t *BlockHeight256BytesC, - dml_uint_t *BlockWidth256BytesY, - dml_uint_t *BlockWidth256BytesC, - dml_uint_t *MacroTileHeightY, - dml_uint_t *MacroTileHeightC, - dml_uint_t *MacroTileWidthY, - dml_uint_t *MacroTileWidthC) -{ - if (SourcePixelFormat == dml_444_64) { - *BytePerPixelDETY = 8; - *BytePerPixelDETC = 0; - *BytePerPixelY = 8; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) { - *BytePerPixelDETY = 4; - *BytePerPixelDETC = 0; - *BytePerPixelY = 4; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) { - *BytePerPixelDETY = 2; - *BytePerPixelDETC = 0; - *BytePerPixelY = 2; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) { - *BytePerPixelDETY = 1; - *BytePerPixelDETC = 0; - *BytePerPixelY = 1; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml_rgbe_alpha) { - *BytePerPixelDETY = 4; - *BytePerPixelDETC = 1; - *BytePerPixelY = 4; - *BytePerPixelC = 1; - } else if (SourcePixelFormat == dml_420_8) { - *BytePerPixelDETY = 1; - *BytePerPixelDETC = 2; - *BytePerPixelY = 1; - *BytePerPixelC = 2; - } else if (SourcePixelFormat == dml_420_12) { - *BytePerPixelDETY = 2; - *BytePerPixelDETC = 4; - *BytePerPixelY = 2; - *BytePerPixelC = 4; - } else { - *BytePerPixelDETY = (dml_float_t) (4.0 / 3); - *BytePerPixelDETC = (dml_float_t) (8.0 / 3); - *BytePerPixelY = 2; - *BytePerPixelC = 4; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); - dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); - dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); - dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); - dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); -#endif - if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32 - || SourcePixelFormat == dml_444_16 - || SourcePixelFormat == dml_444_8 - || SourcePixelFormat == dml_mono_16 - || SourcePixelFormat == dml_mono_8 - || SourcePixelFormat == dml_rgbe)) { - if (SurfaceTiling == dml_sw_linear) { - *BlockHeight256BytesY = 1; - } else if (SourcePixelFormat == dml_444_64) { - *BlockHeight256BytesY = 4; - } else if (SourcePixelFormat == dml_444_8) { - *BlockHeight256BytesY = 16; - } else { - *BlockHeight256BytesY = 8; - } - *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; - *BlockHeight256BytesC = 0; - *BlockWidth256BytesC = 0; - } else { - if (SurfaceTiling == dml_sw_linear) { - *BlockHeight256BytesY = 1; - *BlockHeight256BytesC = 1; - } else if (SourcePixelFormat == dml_rgbe_alpha) { - *BlockHeight256BytesY = 8; - *BlockHeight256BytesC = 16; - } else if (SourcePixelFormat == dml_420_8) { - *BlockHeight256BytesY = 16; - *BlockHeight256BytesC = 8; - } else { - *BlockHeight256BytesY = 8; - *BlockHeight256BytesC = 8; - } - *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; - *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); - dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); - dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); - dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); -#endif - - if (SurfaceTiling == dml_sw_linear) { - *MacroTileHeightY = *BlockHeight256BytesY; - *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; - } - } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) { - *MacroTileHeightY = 16 * *BlockHeight256BytesY; - *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = 16 * *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; - } - } else { - *MacroTileHeightY = 32 * *BlockHeight256BytesY; - *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = 32 * *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; - } - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); - dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); - dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); - dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); -#endif -} // CalculateBytePerPixelAndBlockSizes - -static noinline_for_stack dml_float_t CalculateTWait( - dml_uint_t PrefetchMode, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, - dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - dml_bool_t DRRDisplay, - dml_float_t DRAMClockChangeLatency, - dml_float_t FCLKChangeLatency, - dml_float_t UrgentLatency, - dml_float_t SREnterPlusExitTime) -{ - dml_float_t TWait = 0.0; - - if (PrefetchMode == 0 && - !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) && - !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { - TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); - } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { - TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); - } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { - TWait = dml_max(SREnterPlusExitTime, UrgentLatency); - } else { - TWait = UrgentLatency; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode); - dml_print("DML::%s: TWait = %f\n", __func__, TWait); -#endif - return TWait; -} // CalculateTWait - - -/// @brief Calculate the "starting point" for prefetch calculation -/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation -/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from -/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving -/// features). -static void CalculatePrefetchMode( - enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, - dml_uint_t *MinPrefetchMode, - dml_uint_t *MaxPrefetchMode) -{ - if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) { - *MinPrefetchMode = 0; // consider all pwr saving features - *MaxPrefetchMode = 3; // consider just urgent latency - } else { - if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) { - *MinPrefetchMode = 3; - } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) { - *MinPrefetchMode = 2; - } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) { - *MinPrefetchMode = 1; - } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) { - *MinPrefetchMode = 0; - } else { - dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank); - ASSERT(0); - } - *MaxPrefetchMode = *MinPrefetchMode; - } -} // CalculatePrefetchMode - -static dml_float_t CalculateWriteBackDISPCLK( - enum dml_source_format_class WritebackPixelFormat, - dml_float_t PixelClock, - dml_float_t WritebackHRatio, - dml_float_t WritebackVRatio, - dml_uint_t WritebackHTaps, - dml_uint_t WritebackVTaps, - dml_uint_t WritebackSourceWidth, - dml_uint_t WritebackDestinationWidth, - dml_uint_t HTotal, - dml_uint_t WritebackLineBufferSize, - dml_float_t DISPCLKDPPCLKVCOSpeed) -{ - dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB; - - DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; - DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal; - DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth; - return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); -} - -static dml_float_t CalculateWriteBackDelay( - enum dml_source_format_class WritebackPixelFormat, - dml_float_t WritebackHRatio, - dml_float_t WritebackVRatio, - dml_uint_t WritebackVTaps, - dml_uint_t WritebackDestinationWidth, - dml_uint_t WritebackDestinationHeight, - dml_uint_t WritebackSourceHeight, - dml_uint_t HTotal) -{ - dml_float_t CalculateWriteBackDelay; - dml_float_t Line_length; - dml_float_t Output_lines_last_notclamped; - dml_float_t WritebackVInit; - - WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; - Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); - Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0); - if (Output_lines_last_notclamped < 0) { - CalculateWriteBackDelay = 0; - } else { - CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; - } - return CalculateWriteBackDelay; -} - -static void CalculateVUpdateAndDynamicMetadataParameters( - dml_uint_t MaxInterDCNTileRepeaters, - dml_float_t Dppclk, - dml_float_t Dispclk, - dml_float_t DCFClkDeepSleep, - dml_float_t PixelClock, - dml_uint_t HTotal, - dml_uint_t VBlank, - dml_uint_t DynamicMetadataTransmittedBytes, - dml_uint_t DynamicMetadataLinesBeforeActiveRequired, - dml_uint_t InterlaceEnable, - dml_bool_t ProgressiveToInterlaceUnitInOPP, - - // Output - dml_float_t *TSetup, - dml_float_t *Tdmbf, - dml_float_t *Tdmec, - dml_float_t *Tdmsks, - dml_uint_t *VUpdateOffsetPix, - dml_uint_t *VUpdateWidthPix, - dml_uint_t *VReadyOffsetPix) -{ - dml_float_t TotalRepeaterDelayTime; - TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); - *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); - *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); - *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0)); - *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; - *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; - *Tdmec = HTotal / PixelClock; - - if (DynamicMetadataLinesBeforeActiveRequired == 0) { - *Tdmsks = VBlank * HTotal / PixelClock / 2.0; - } else { - *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; - } - if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { - *Tdmsks = *Tdmsks / 2; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); - dml_print("DML::%s: VBlank = %u\n", __func__, VBlank); - dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); - dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk); - dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); - dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); - dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); - - dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); - dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); - dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); - - dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); -#endif -} - -static void CalculateRowBandwidth( - dml_bool_t GPUVMEnable, - enum dml_source_format_class SourcePixelFormat, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_bool_t DCCEnable, - dml_float_t LineTime, - dml_uint_t MetaRowByteLuma, - dml_uint_t MetaRowByteChroma, - dml_uint_t meta_row_height_luma, - dml_uint_t meta_row_height_chroma, - dml_uint_t PixelPTEBytesPerRowLuma, - dml_uint_t PixelPTEBytesPerRowChroma, - dml_uint_t dpte_row_height_luma, - dml_uint_t dpte_row_height_chroma, - // Output - dml_float_t *meta_row_bw, - dml_float_t *dpte_row_bw) -{ - if (DCCEnable != true) { - *meta_row_bw = 0; - } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { - *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) - + VRatioChroma * MetaRowByteChroma - / (meta_row_height_chroma * LineTime); - } else { - *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); - } - - if (GPUVMEnable != true) { - *dpte_row_bw = 0; - } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) - + VRatioChroma * PixelPTEBytesPerRowChroma - / (dpte_row_height_chroma * LineTime); - } else { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); - } -} - -/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule -/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes -static void CalculateFlipSchedule( - dml_float_t HostVMInefficiencyFactor, - dml_float_t UrgentExtraLatency, - dml_float_t UrgentLatency, - dml_uint_t GPUVMMaxPageTableLevels, - dml_bool_t HostVMEnable, - dml_uint_t HostVMMaxNonCachedPageTableLevels, - dml_bool_t GPUVMEnable, - dml_uint_t HostVMMinPageSize, - dml_float_t PDEAndMetaPTEBytesPerFrame, - dml_float_t MetaRowBytes, - dml_float_t DPTEBytesPerRow, - dml_float_t BandwidthAvailableForImmediateFlip, - dml_uint_t TotImmediateFlipBytes, - enum dml_source_format_class SourcePixelFormat, - dml_float_t LineTime, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_float_t Tno_bw, - dml_bool_t DCCEnable, - dml_uint_t dpte_row_height, - dml_uint_t meta_row_height, - dml_uint_t dpte_row_height_chroma, - dml_uint_t meta_row_height_chroma, - dml_bool_t use_one_row_for_frame_flip, - - // Output - dml_float_t *DestinationLinesToRequestVMInImmediateFlip, - dml_float_t *DestinationLinesToRequestRowInImmediateFlip, - dml_float_t *final_flip_bw, - dml_bool_t *ImmediateFlipSupportedForPipe) -{ - dml_float_t min_row_time = 0.0; - dml_uint_t HostVMDynamicLevelsTrips = 0; - dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0; - dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0; - dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe - - if (GPUVMEnable == true && HostVMEnable == true) { - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; - } else { - HostVMDynamicLevelsTrips = 0; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); - dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); -#endif - - if (TotImmediateFlipBytes > 0) { - if (use_one_row_for_frame_flip) { - ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; - } else { - ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; - } - if (GPUVMEnable == true) { - TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, - UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), - LineTime / 4.0); - } else { - TimeForFetchingMetaPTEImmediateFlip = 0; - } - if ((GPUVMEnable == true || DCCEnable == true)) { - TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); - } else { - TimeForFetchingRowInVBlankImmediateFlip = 0; - } - - *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; - *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; - - if (GPUVMEnable == true) { - *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), - (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); - } else if ((GPUVMEnable == true || DCCEnable == true)) { - *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); - } else { - *final_flip_bw = 0; - } - } else { - TimeForFetchingMetaPTEImmediateFlip = 0; - TimeForFetchingRowInVBlankImmediateFlip = 0; - *DestinationLinesToRequestVMInImmediateFlip = 0; - *DestinationLinesToRequestRowInImmediateFlip = 0; - *final_flip_bw = 0; - } - - if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); - } else { - min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); - } - } else { - if (GPUVMEnable == true && DCCEnable != true) { - min_row_time = dpte_row_height * LineTime / VRatio; - } else if (GPUVMEnable != true && DCCEnable == true) { - min_row_time = meta_row_height * LineTime / VRatio; - } else { - min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); - } - } - - if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { - *ImmediateFlipSupportedForPipe = false; - } else { - *ImmediateFlipSupportedForPipe = true; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); - - dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes); - dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); - dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); - dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW); - dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame); - dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); - dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); - - dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); - dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); - dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); - dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); - dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); - dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); -#endif -} // CalculateFlipSchedule - -static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed) -{ - if (Clock <= 0.0) - return 0.0; - else { - if (round_up) - return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); - else - return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); - } -} - -static void CalculateDCCConfiguration( - dml_bool_t DCCEnabled, - dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t SurfaceWidthLuma, - dml_uint_t SurfaceWidthChroma, - dml_uint_t SurfaceHeightLuma, - dml_uint_t SurfaceHeightChroma, - dml_uint_t nomDETInKByte, - dml_uint_t RequestHeight256ByteLuma, - dml_uint_t RequestHeight256ByteChroma, - enum dml_swizzle_mode TilingFormat, - dml_uint_t BytePerPixelY, - dml_uint_t BytePerPixelC, - dml_float_t BytePerPixelDETY, - dml_float_t BytePerPixelDETC, - enum dml_rotation_angle SourceScan, - // Output - dml_uint_t *MaxUncompressedBlockLuma, - dml_uint_t *MaxUncompressedBlockChroma, - dml_uint_t *MaxCompressedBlockLuma, - dml_uint_t *MaxCompressedBlockChroma, - dml_uint_t *IndependentBlockLuma, - dml_uint_t *IndependentBlockChroma) -{ - dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024; - - dml_uint_t yuv420; - dml_uint_t horz_div_l; - dml_uint_t horz_div_c; - dml_uint_t vert_div_l; - dml_uint_t vert_div_c; - - dml_uint_t swath_buf_size; - dml_float_t detile_buf_vp_horz_limit; - dml_float_t detile_buf_vp_vert_limit; - - dml_uint_t MAS_vp_horz_limit; - dml_uint_t MAS_vp_vert_limit; - dml_uint_t max_vp_horz_width; - dml_uint_t max_vp_vert_height; - dml_uint_t eff_surf_width_l; - dml_uint_t eff_surf_width_c; - dml_uint_t eff_surf_height_l; - dml_uint_t eff_surf_height_c; - - dml_uint_t full_swath_bytes_horz_wc_l; - dml_uint_t full_swath_bytes_horz_wc_c; - dml_uint_t full_swath_bytes_vert_wc_l; - dml_uint_t full_swath_bytes_vert_wc_c; - - dml_uint_t req128_horz_wc_l; - dml_uint_t req128_horz_wc_c; - dml_uint_t req128_vert_wc_l; - dml_uint_t req128_vert_wc_c; - - dml_uint_t segment_order_horz_contiguous_luma; - dml_uint_t segment_order_horz_contiguous_chroma; - dml_uint_t segment_order_vert_contiguous_luma; - dml_uint_t segment_order_vert_contiguous_chroma; - - typedef enum{ - REQ_256Bytes, - REQ_128BytesNonContiguous, - REQ_128BytesContiguous, - REQ_NA - } RequestType; - - RequestType RequestLuma; - RequestType RequestChroma; - - yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0); - horz_div_l = 1; - horz_div_c = 1; - vert_div_l = 1; - vert_div_c = 1; - - if (BytePerPixelY == 1) - vert_div_l = 0; - if (BytePerPixelC == 1) - vert_div_c = 0; - - if (BytePerPixelC == 0) { - swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; - detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); - detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); - } else { - swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; - detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); - detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); - } - - if (SourcePixelFormat == dml_420_10) { - detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; - detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; - } - - detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); - detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); - - MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144; - MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); - max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit)); - max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit)); - eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); - eff_surf_width_c = eff_surf_width_l / (1 + yuv420); - eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); - eff_surf_height_c = eff_surf_height_l / (1 + yuv420); - - full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; - full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; - if (BytePerPixelC > 0) { - full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; - full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; - } else { - full_swath_bytes_horz_wc_c = 0; - full_swath_bytes_vert_wc_c = 0; - } - - if (SourcePixelFormat == dml_420_10) { - full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); - full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); - full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); - full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); - } - - if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 0; - req128_horz_wc_c = 0; - } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 0; - req128_horz_wc_c = 1; - } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 1; - req128_horz_wc_c = 0; - } else { - req128_horz_wc_l = 1; - req128_horz_wc_c = 1; - } - - if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 0; - req128_vert_wc_c = 0; - } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 0; - req128_vert_wc_c = 1; - } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 1; - req128_vert_wc_c = 0; - } else { - req128_vert_wc_l = 1; - req128_vert_wc_c = 1; - } - - if (BytePerPixelY == 2) { - segment_order_horz_contiguous_luma = 0; - segment_order_vert_contiguous_luma = 1; - } else { - segment_order_horz_contiguous_luma = 1; - segment_order_vert_contiguous_luma = 0; - } - - if (BytePerPixelC == 2) { - segment_order_horz_contiguous_chroma = 0; - segment_order_vert_contiguous_chroma = 1; - } else { - segment_order_horz_contiguous_chroma = 1; - segment_order_vert_contiguous_chroma = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); - dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); - dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); - dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); - dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); - dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); - dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); - dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); -#endif - - if (DCCProgrammingAssumesScanDirectionUnknown == true) { - if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { - RequestLuma = REQ_256Bytes; - } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { - RequestLuma = REQ_128BytesNonContiguous; - } else { - RequestLuma = REQ_128BytesContiguous; - } - if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { - RequestChroma = REQ_256Bytes; - } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { - RequestChroma = REQ_128BytesNonContiguous; - } else { - RequestChroma = REQ_128BytesContiguous; - } - } else if (!dml_is_vertical_rotation(SourceScan)) { - if (req128_horz_wc_l == 0) { - RequestLuma = REQ_256Bytes; - } else if (segment_order_horz_contiguous_luma == 0) { - RequestLuma = REQ_128BytesNonContiguous; - } else { - RequestLuma = REQ_128BytesContiguous; - } - if (req128_horz_wc_c == 0) { - RequestChroma = REQ_256Bytes; - } else if (segment_order_horz_contiguous_chroma == 0) { - RequestChroma = REQ_128BytesNonContiguous; - } else { - RequestChroma = REQ_128BytesContiguous; - } - } else { - if (req128_vert_wc_l == 0) { - RequestLuma = REQ_256Bytes; - } else if (segment_order_vert_contiguous_luma == 0) { - RequestLuma = REQ_128BytesNonContiguous; - } else { - RequestLuma = REQ_128BytesContiguous; - } - if (req128_vert_wc_c == 0) { - RequestChroma = REQ_256Bytes; - } else if (segment_order_vert_contiguous_chroma == 0) { - RequestChroma = REQ_128BytesNonContiguous; - } else { - RequestChroma = REQ_128BytesContiguous; - } - } - - if (RequestLuma == REQ_256Bytes) { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 256; - *IndependentBlockLuma = 0; - } else if (RequestLuma == REQ_128BytesContiguous) { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 128; - *IndependentBlockLuma = 128; - } else { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 64; - *IndependentBlockLuma = 64; - } - - if (RequestChroma == REQ_256Bytes) { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 256; - *IndependentBlockChroma = 0; - } else if (RequestChroma == REQ_128BytesContiguous) { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 128; - *IndependentBlockChroma = 128; - } else { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 64; - *IndependentBlockChroma = 64; - } - - if (DCCEnabled != true || BytePerPixelC == 0) { - *MaxUncompressedBlockChroma = 0; - *MaxCompressedBlockChroma = 0; - *IndependentBlockChroma = 0; - } - - if (DCCEnabled != true) { - *MaxUncompressedBlockLuma = 0; - *MaxCompressedBlockLuma = 0; - *IndependentBlockLuma = 0; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); - dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); - dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); - dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); - dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); - dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); -#endif - -} // CalculateDCCConfiguration - -static dml_uint_t CalculatePrefetchSourceLines( - dml_float_t VRatio, - dml_uint_t VTaps, - dml_bool_t Interlace, - dml_bool_t ProgressiveToInterlaceUnitInOPP, - dml_uint_t SwathHeight, - enum dml_rotation_angle SourceScan, - dml_bool_t ViewportStationary, - dml_uint_t SwathWidth, - dml_uint_t ViewportHeight, - dml_uint_t ViewportXStart, - dml_uint_t ViewportYStart, - - // Output - dml_uint_t *VInitPreFill, - dml_uint_t *MaxNumSwath) -{ - - dml_uint_t vp_start_rot = 0; - dml_uint_t sw0_tmp = 0; - dml_uint_t MaxPartialSwath = 0; - dml_float_t numLines = 0; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); - dml_print("DML::%s: VTaps = %u\n", __func__, VTaps); - dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); - dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); - dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); - dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); -#endif - if (ProgressiveToInterlaceUnitInOPP) - *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1)); - else - *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1)); - - if (ViewportStationary) { - if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) { - vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); - } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) { - vp_start_rot = ViewportXStart; - } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) { - vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); - } else { - vp_start_rot = ViewportYStart; - } - sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); - if (sw0_tmp < *VInitPreFill) { - *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1); - } else { - *MaxNumSwath = 1; - } - MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight)); - } else { - *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1); - if (*VInitPreFill > 1) { - MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight)); - } else { - MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight)); - } - } - numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); - dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); - dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); - dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); - dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); -#endif - return (dml_uint_t)(numLines); - -} // CalculatePrefetchSourceLines - -static dml_uint_t CalculateVMAndRowBytes( - dml_bool_t ViewportStationary, - dml_bool_t DCCEnable, - dml_uint_t NumberOfDPPs, - dml_uint_t BlockHeight256Bytes, - dml_uint_t BlockWidth256Bytes, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t SurfaceTiling, - dml_uint_t BytePerPixel, - enum dml_rotation_angle SourceScan, - dml_uint_t SwathWidth, - dml_uint_t ViewportHeight, - dml_uint_t ViewportXStart, - dml_uint_t ViewportYStart, - dml_bool_t GPUVMEnable, - dml_uint_t GPUVMMaxPageTableLevels, - dml_uint_t GPUVMMinPageSizeKBytes, - dml_uint_t PTEBufferSizeInRequests, - dml_uint_t Pitch, - dml_uint_t DCCMetaPitch, - dml_uint_t MacroTileWidth, - dml_uint_t MacroTileHeight, - - // Output - dml_uint_t *MetaRowByte, - dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation - dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check - dml_uint_t *dpte_row_width_ub, - dml_uint_t *dpte_row_height, - dml_uint_t *dpte_row_height_linear, - dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, - dml_uint_t *dpte_row_width_ub_one_row_per_frame, - dml_uint_t *dpte_row_height_one_row_per_frame, - dml_uint_t *MetaRequestWidth, - dml_uint_t *MetaRequestHeight, - dml_uint_t *meta_row_width, - dml_uint_t *meta_row_height, - dml_uint_t *PixelPTEReqWidth, - dml_uint_t *PixelPTEReqHeight, - dml_uint_t *PTERequestSize, - dml_uint_t *DPDE0BytesFrame, - dml_uint_t *MetaPTEBytesFrame) -{ - dml_uint_t MPDEBytesFrame; - dml_uint_t DCCMetaSurfaceBytes; - dml_uint_t ExtraDPDEBytesFrame; - dml_uint_t PDEAndMetaPTEBytesFrame; - dml_uint_t MacroTileSizeBytes; - dml_uint_t vp_height_meta_ub; - dml_uint_t vp_height_dpte_ub; - - dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this - - *MetaRequestHeight = 8 * BlockHeight256Bytes; - *MetaRequestWidth = 8 * BlockWidth256Bytes; - if (SurfaceTiling == dml_sw_linear) { - *meta_row_height = 32; - *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); - } else if (!dml_is_vertical_rotation(SourceScan)) { - *meta_row_height = *MetaRequestHeight; - if (ViewportStationary && NumberOfDPPs == 1) { - *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); - } else { - *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth); - } - *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0); - } else { - *meta_row_height = *MetaRequestWidth; - if (ViewportStationary && NumberOfDPPs == 1) { - *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight)); - } else { - *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight); - } - *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0); - } - - if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { - vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes)); - } else if (!dml_is_vertical_rotation(SourceScan)) { - vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); - } else { - vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); - } - - DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0); - - if (GPUVMEnable == true) { - *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64); - MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); - } else { - *MetaPTEBytesFrame = 0; - MPDEBytesFrame = 0; - } - - if (DCCEnable != true) { - *MetaPTEBytesFrame = 0; - MPDEBytesFrame = 0; - *MetaRowByte = 0; - } - - MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; - - if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { - vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight)); - } else if (!dml_is_vertical_rotation(SourceScan)) { - vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight); - } else { - vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight); - } - - if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { - *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1)); - ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); - } else { - *DPDE0BytesFrame = 0; - ExtraDPDEBytesFrame = 0; - } - - PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); - dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear); - dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel); - dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels); - dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); - dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); - dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight); - dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth); - dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame); - dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame); - dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame); - dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame); - dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame); - dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight); - dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth); - dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); -#endif - - if (SurfaceTiling == dml_sw_linear) { - *PixelPTEReqHeight = 1; - *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; - PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; - *PTERequestSize = 64; - } else if (GPUVMMinPageSizeKBytes == 4) { - *PixelPTEReqHeight = 16 * BlockHeight256Bytes; - *PixelPTEReqWidth = 16 * BlockWidth256Bytes; - *PTERequestSize = 128; - } else { - *PixelPTEReqHeight = MacroTileHeight; - *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); - *PTERequestSize = 64; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); - dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); - dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight); - dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth); - dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); - dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize); - dml_print("DML::%s: Pitch = %u\n", __func__, Pitch); -#endif - - *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; - *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth); - *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); - - if (SurfaceTiling == dml_sw_linear) { - *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1))); - dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); - dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); - dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); - dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); - dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); - - *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth); - *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); - - // VBA_DELTA, VBA doesn't have programming value for pte row height linear. - *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1); - if (*dpte_row_height_linear > 128) - *dpte_row_height_linear = 128; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub); -#endif - - } else if (!dml_is_vertical_rotation(SourceScan)) { - *dpte_row_height = *PixelPTEReqHeight; - - if (GPUVMMinPageSizeKBytes > 64) { - *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth); - } else if (ViewportStationary && (NumberOfDPPs == 1)) { - *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth)); - } else { - *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth); - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub); -#endif - - ASSERT(*PixelPTEReqWidth); - if (*PixelPTEReqWidth != 0) - *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; - } else { - *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth)); - - if (ViewportStationary && (NumberOfDPPs == 1)) { - *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight)); - } else { - *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight); - } - - *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub); -#endif - } - - if (GPUVMEnable != true) - *PixelPTEBytesPerRow = 0; - - *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); - dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); - dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear); - dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub); - dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow); - dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage); - dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); - dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame); - dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame); - dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame); -#endif - - dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); - - return PDEAndMetaPTEBytesFrame; -} // CalculateVMAndRowBytes - -static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported) -{ - dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg); - - //Progressive To Interlace Unit Effect - for (dml_uint_t k = 0; k < num_active_planes; ++k) { - display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k]; - if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) { - display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k]; - } - } -} - -static dml_float_t TruncToValidBPP( - dml_float_t LinkBitRate, - dml_uint_t Lanes, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_float_t PixelClock, - dml_float_t DesiredBPP, - dml_bool_t DSCEnable, - enum dml_output_encoder_class Output, - enum dml_output_format_class Format, - dml_uint_t DSCInputBitPerComponent, - dml_uint_t DSCSlices, - dml_uint_t AudioRate, - dml_uint_t AudioLayout, - enum dml_odm_mode ODMModeNoDSC, - enum dml_odm_mode ODMModeDSC, - - // Output - dml_uint_t *RequiredSlots) -{ - dml_float_t MaxLinkBPP; - dml_uint_t MinDSCBPP; - dml_float_t MaxDSCBPP; - dml_uint_t NonDSCBPP0; - dml_uint_t NonDSCBPP1; - dml_uint_t NonDSCBPP2; - - if (Format == dml_420) { - NonDSCBPP0 = 12; - NonDSCBPP1 = 15; - NonDSCBPP2 = 18; - MinDSCBPP = 6; - MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; - } else if (Format == dml_444) { - NonDSCBPP0 = 24; - NonDSCBPP1 = 30; - NonDSCBPP2 = 36; - MinDSCBPP = 8; - MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; - } else { - if (Output == dml_hdmi) { - NonDSCBPP0 = 24; - NonDSCBPP1 = 24; - NonDSCBPP2 = 24; - } else { - NonDSCBPP0 = 16; - NonDSCBPP1 = 20; - NonDSCBPP2 = 24; - } - if (Format == dml_n422) { - MinDSCBPP = 7; - MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; - } else { - MinDSCBPP = 8; - MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; - } - } - - if (Output == dml_dp2p0) { - MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; - } else if (DSCEnable && Output == dml_dp) { - MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); - } else { - MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; - } - - if (DSCEnable) { - if (ODMModeDSC == dml_odm_mode_combine_4to1) { - MaxLinkBPP = dml_min(MaxLinkBPP, 16); - } else if (ODMModeDSC == dml_odm_mode_combine_2to1) { - MaxLinkBPP = dml_min(MaxLinkBPP, 32); - } else if (ODMModeDSC == dml_odm_mode_split_1to2) { - MaxLinkBPP = 2 * MaxLinkBPP; - } - } else { - if (ODMModeNoDSC == dml_odm_mode_combine_4to1) { - MaxLinkBPP = dml_min(MaxLinkBPP, 16); - } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) { - MaxLinkBPP = dml_min(MaxLinkBPP, 32); - } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) { - MaxLinkBPP = 2 * MaxLinkBPP; - } - } - - *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1)); - - if (DesiredBPP == 0) { - if (DSCEnable) { - if (MaxLinkBPP < MinDSCBPP) { - return __DML_DPP_INVALID__; - } else if (MaxLinkBPP >= MaxDSCBPP) { - return MaxDSCBPP; - } else { - return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; - } - } else { - if (MaxLinkBPP >= NonDSCBPP2) { - return NonDSCBPP2; - } else if (MaxLinkBPP >= NonDSCBPP1) { - return NonDSCBPP1; - } else if (MaxLinkBPP >= NonDSCBPP0) { - return NonDSCBPP0; - } else { - return __DML_DPP_INVALID__; - } - } - } else { - if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || - (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { - return __DML_DPP_INVALID__; - } else { - return DesiredBPP; - } - } -} // TruncToValidBPP - -static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - struct display_mode_lib_scratch_st *scratch, - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p) -{ - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; - - s->TotalActiveWriteback = 0; - p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; - p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; - p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; - p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; - p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; - p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; - p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; - p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); - dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); - dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); - dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); - dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); - dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); - dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); - dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); - dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); - dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); - dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); -#endif - - s->TotalActiveWriteback = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->WritebackEnable[k] == true) { - s->TotalActiveWriteback = s->TotalActiveWriteback + 1; - } - } - - if (s->TotalActiveWriteback <= 1) { - p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; - } else { - p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; - } - if (p->USRRetrainingRequiredFinal) - p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; - - if (s->TotalActiveWriteback <= 1) { - p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; - p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; - } else { - p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; - p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; - } - - if (p->USRRetrainingRequiredFinal) - p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; - - if (p->USRRetrainingRequiredFinal) - p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); - dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); - dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); - dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal); - dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); -#endif - - s->TotalPixelBW = 0.0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] - * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]); - } - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - - s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1)); - s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1)); - - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); - dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); - dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]); - dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]); - dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]); -#endif - - s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]); - s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]); - - s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; - if (p->UnboundedRequestEnabled) { - s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW; - } - - s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; - s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k])); - s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; - - s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; - - if (p->NumberOfActiveSurfaces > 1) { - s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k]; - } - - if (p->BytePerPixelDETC[k] > 0) { - s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; - s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k])); - s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k]; - s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; - if (p->NumberOfActiveSurfaces > 1) { - s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k]; - } - s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); - } else { - s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; - } - - s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark; - s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark; - s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; - - if (p->WritebackEnable[k]) { - s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0); - if (p->WritebackPixelFormat[k] == dml_444_64) { - s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; - } - s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; - - s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; - - s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); - s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); - } - p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); - p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; - } - - *p->USRRetrainingSupport = true; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) { - *p->USRRetrainingSupport = false; - } - } - - s->FoundCriticalSurface = false; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface) - || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { - s->FoundCriticalSurface = true; - *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; - } - } - - for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) { - for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) { - if (i == j || - (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) || - (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) || - (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) || - (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) || - (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) { - s->SynchronizedSurfaces[i][j] = true; - } else { - s->SynchronizedSurfaces[i][j] = false; - } - } - } - - s->FCLKChangeSupportNumber = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) { - if (!(p->PrefetchMode[k] <= 1)) { - s->FCLKChangeSupportNumber = 3; - } else if (s->FCLKChangeSupportNumber == 0) { - s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1); - s->LastSurfaceWithoutMargin = k; - } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2)) - s->FCLKChangeSupportNumber = 3; - } - } - - if (s->FCLKChangeSupportNumber == 0) { - *p->FCLKChangeSupport = dml_fclock_change_vactive; - } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) { - *p->FCLKChangeSupport = dml_fclock_change_vblank; - } else { - *p->FCLKChangeSupport = dml_fclock_change_unsupported; - } - - s->DRAMClockChangeMethod = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) - s->DRAMClockChangeMethod = 1; - else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) - s->DRAMClockChangeMethod = 2; - } - - s->DRAMClockChangeSupportNumber = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) || - ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) || - ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) { - if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support - s->DRAMClockChangeSupportNumber = 3; - } else if (s->DRAMClockChangeSupportNumber == 0) { - s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1; - s->LastSurfaceWithoutMargin = k; - } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) { - s->DRAMClockChangeSupportNumber = 3; - } - } - } - - if (s->DRAMClockChangeMethod == 0) { // No MALL usage - if (s->DRAMClockChangeSupportNumber == 0) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive; - } else if (s->DRAMClockChangeSupportNumber == 1) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank; - } else if (s->DRAMClockChangeSupportNumber == 2) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr; - } else { - *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; - } - } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame - if (s->DRAMClockChangeSupportNumber == 0) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame; - } else if (s->DRAMClockChangeSupportNumber == 1) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame; - } else if (s->DRAMClockChangeSupportNumber == 2) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame; - } else { - *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; - } - } else { // Any pipe using MALL subviewport - if (s->DRAMClockChangeSupportNumber == 0) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp; - } else if (s->DRAMClockChangeSupportNumber == 1) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp; - } else if (s->DRAMClockChangeSupportNumber == 2) { - *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp; - } else { - *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; - } - } - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1)); - s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k])); - s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); - s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k]; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); - dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); - dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); - dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); - dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]); - dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); -#endif - p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; - - if (p->BytePerPixelDETC[k] > 0) { - s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k])); - s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); - s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k]; - p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c)); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); - dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); - dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]); - dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); -#endif - } - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport); - dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport); - dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); - dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); -#endif -} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport - -static void CalculateDCFCLKDeepSleep( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t BytePerPixelY[], - dml_uint_t BytePerPixelC[], - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_uint_t SwathWidthY[], - dml_uint_t SwathWidthC[], - dml_uint_t DPPPerSurface[], - dml_float_t HRatio[], - dml_float_t HRatioChroma[], - dml_float_t PixelClock[], - dml_float_t PSCL_THROUGHPUT[], - dml_float_t PSCL_THROUGHPUT_CHROMA[], - dml_float_t Dppclk[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_uint_t ReturnBusWidth, - - // Output - dml_float_t *DCFClkDeepSleep) -{ - dml_float_t DisplayPipeLineDeliveryTimeLuma; - dml_float_t DisplayPipeLineDeliveryTimeChroma; - dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__]; - dml_float_t ReadBandwidth = 0.0; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - - if (VRatio[k] <= 1) { - DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChroma = 0; - } else { - if (VRatioChroma[k] <= 1) { - DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } - - if (BytePerPixelC[k] > 0) { - DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, - __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); - } else { - DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; - } - DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]); - dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); -#endif - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; - } - - *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); - dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); - dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); - dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); -#endif - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); - } - dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); -} // CalculateDCFCLKDeepSleep - -static void CalculateUrgentBurstFactor( - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, - dml_uint_t swath_width_luma_ub, - dml_uint_t swath_width_chroma_ub, - dml_uint_t SwathHeightY, - dml_uint_t SwathHeightC, - dml_float_t LineTime, - dml_float_t UrgentLatency, - dml_float_t CursorBufferSize, - dml_uint_t CursorWidth, - dml_uint_t CursorBPP, - dml_float_t VRatio, - dml_float_t VRatioC, - dml_float_t BytePerPixelInDETY, - dml_float_t BytePerPixelInDETC, - dml_uint_t DETBufferSizeY, - dml_uint_t DETBufferSizeC, - // Output - dml_float_t *UrgentBurstFactorCursor, - dml_float_t *UrgentBurstFactorLuma, - dml_float_t *UrgentBurstFactorChroma, - dml_bool_t *NotEnoughUrgentLatencyHiding) -{ - dml_float_t LinesInDETLuma; - dml_float_t LinesInDETChroma; - dml_uint_t LinesInCursorBuffer; - dml_float_t CursorBufferSizeInTime; - dml_float_t DETBufferSizeInTimeLuma; - dml_float_t DETBufferSizeInTimeChroma; - - *NotEnoughUrgentLatencyHiding = 0; - - if (CursorWidth > 0) { - LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); - if (VRatio > 0) { - CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; - if (CursorBufferSizeInTime - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorCursor = 0; - } else { - *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); - } - } else { - *UrgentBurstFactorCursor = 1; - } - } - - LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; - - if (VRatio > 0) { - DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; - if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorLuma = 0; - } else { - *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); - } - } else { - *UrgentBurstFactorLuma = 1; - } - - if (BytePerPixelInDETC > 0) { - LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; - - if (VRatioC > 0) { - DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; - if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorChroma = 0; - } else { - *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); - } - } else { - *UrgentBurstFactorChroma = 1; - } - } -} // CalculateUrgentBurstFactor - -static void CalculatePixelDeliveryTimes( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_float_t VRatioPrefetchY[], - dml_float_t VRatioPrefetchC[], - dml_uint_t swath_width_luma_ub[], - dml_uint_t swath_width_chroma_ub[], - dml_uint_t DPPPerSurface[], - dml_float_t HRatio[], - dml_float_t HRatioChroma[], - dml_float_t PixelClock[], - dml_float_t PSCL_THROUGHPUT[], - dml_float_t PSCL_THROUGHPUT_CHROMA[], - dml_float_t Dppclk[], - dml_uint_t BytePerPixelC[], - enum dml_rotation_angle SourceScan[], - dml_uint_t NumberOfCursors[], - dml_uint_t CursorWidth[], - dml_uint_t CursorBPP[], - dml_uint_t BlockWidth256BytesY[], - dml_uint_t BlockHeight256BytesY[], - dml_uint_t BlockWidth256BytesC[], - dml_uint_t BlockHeight256BytesC[], - - // Output - dml_float_t DisplayPipeLineDeliveryTimeLuma[], - dml_float_t DisplayPipeLineDeliveryTimeChroma[], - dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], - dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], - dml_float_t DisplayPipeRequestDeliveryTimeLuma[], - dml_float_t DisplayPipeRequestDeliveryTimeChroma[], - dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], - dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], - dml_float_t CursorRequestDeliveryTime[], - dml_float_t CursorRequestDeliveryTimePrefetch[]) -{ - dml_float_t req_per_swath_ub; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]); - dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]); - dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); - dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); - dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); - dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); - dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); - dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); - dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); - dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]); - dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); -#endif - - if (VRatio[k] <= 1) { - DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChroma[k] = 0; - } else { - if (VRatioChroma[k] <= 1) { - DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } - - if (VRatioPrefetchY[k] <= 1) { - DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; - } else { - if (VRatioPrefetchC[k] <= 1) { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; - } else { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); - dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); - dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); - dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); -#endif - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_vertical_rotation(SourceScan[k])) { - req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; - } else { - req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); -#endif - - DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; - DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; - if (BytePerPixelC[k] == 0) { - DisplayPipeRequestDeliveryTimeChroma[k] = 0; - DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; - } else { - if (!dml_is_vertical_rotation(SourceScan[k])) { - req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; - } else { - req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); -#endif - DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; - DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); - dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); - dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); - dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); -#endif - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - dml_uint_t cursor_req_per_width; - cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0)); - if (NumberOfCursors[k] > 0) { - if (VRatio[k] <= 1) { - CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; - } else { - CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; - } - if (VRatioPrefetchY[k] <= 1) { - CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; - } else { - CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; - } - } else { - CursorRequestDeliveryTime[k] = 0; - CursorRequestDeliveryTimePrefetch[k] = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]); - dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); - dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); -#endif - } -} // CalculatePixelDeliveryTimes - -static void CalculateMetaAndPTETimes( - dml_bool_t use_one_row_for_frame[], - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t GPUVMEnable, - dml_uint_t MetaChunkSize, - dml_uint_t MinMetaChunkSizeBytes, - dml_uint_t HTotal[], - dml_float_t VRatio[], - dml_float_t VRatioChroma[], - dml_float_t DestinationLinesToRequestRowInVBlank[], - dml_float_t DestinationLinesToRequestRowInImmediateFlip[], - dml_bool_t DCCEnable[], - dml_float_t PixelClock[], - dml_uint_t BytePerPixelY[], - dml_uint_t BytePerPixelC[], - enum dml_rotation_angle SourceScan[], - dml_uint_t dpte_row_height[], - dml_uint_t dpte_row_height_chroma[], - dml_uint_t meta_row_width[], - dml_uint_t meta_row_width_chroma[], - dml_uint_t meta_row_height[], - dml_uint_t meta_row_height_chroma[], - dml_uint_t meta_req_width[], - dml_uint_t meta_req_width_chroma[], - dml_uint_t meta_req_height[], - dml_uint_t meta_req_height_chroma[], - dml_uint_t dpte_group_bytes[], - dml_uint_t PTERequestSizeY[], - dml_uint_t PTERequestSizeC[], - dml_uint_t PixelPTEReqWidthY[], - dml_uint_t PixelPTEReqHeightY[], - dml_uint_t PixelPTEReqWidthC[], - dml_uint_t PixelPTEReqHeightC[], - dml_uint_t dpte_row_width_luma_ub[], - dml_uint_t dpte_row_width_chroma_ub[], - - // Output - dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], - dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], - dml_float_t DST_Y_PER_META_ROW_NOM_L[], - dml_float_t DST_Y_PER_META_ROW_NOM_C[], - dml_float_t TimePerMetaChunkNominal[], - dml_float_t TimePerChromaMetaChunkNominal[], - dml_float_t TimePerMetaChunkVBlank[], - dml_float_t TimePerChromaMetaChunkVBlank[], - dml_float_t TimePerMetaChunkFlip[], - dml_float_t TimePerChromaMetaChunkFlip[], - dml_float_t time_per_pte_group_nom_luma[], - dml_float_t time_per_pte_group_vblank_luma[], - dml_float_t time_per_pte_group_flip_luma[], - dml_float_t time_per_pte_group_nom_chroma[], - dml_float_t time_per_pte_group_vblank_chroma[], - dml_float_t time_per_pte_group_flip_chroma[]) -{ - dml_uint_t meta_chunk_width; - dml_uint_t min_meta_chunk_width; - dml_uint_t meta_chunk_per_row_int; - dml_uint_t meta_row_remainder; - dml_uint_t meta_chunk_threshold; - dml_uint_t meta_chunks_per_row_ub; - dml_uint_t meta_chunk_width_chroma; - dml_uint_t min_meta_chunk_width_chroma; - dml_uint_t meta_chunk_per_row_int_chroma; - dml_uint_t meta_row_remainder_chroma; - dml_uint_t meta_chunk_threshold_chroma; - dml_uint_t meta_chunks_per_row_ub_chroma; - dml_uint_t dpte_group_width_luma; - dml_uint_t dpte_groups_per_row_luma_ub; - dml_uint_t dpte_group_width_chroma; - dml_uint_t dpte_groups_per_row_chroma_ub; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; - if (BytePerPixelC[k] == 0) { - DST_Y_PER_PTE_ROW_NOM_C[k] = 0; - } else { - DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; - } - DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; - if (BytePerPixelC[k] == 0) { - DST_Y_PER_META_ROW_NOM_C[k] = 0; - } else { - DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; - } - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (DCCEnable[k] == true) { - meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; - min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; - meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; - meta_row_remainder = meta_row_width[k] % meta_chunk_width; - if (!dml_is_vertical_rotation(SourceScan[k])) { - meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; - } else { - meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; - } - if (meta_row_remainder <= meta_chunk_threshold) { - meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; - } else { - meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; - } - TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; - TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; - TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; - if (BytePerPixelC[k] == 0) { - TimePerChromaMetaChunkNominal[k] = 0; - TimePerChromaMetaChunkVBlank[k] = 0; - TimePerChromaMetaChunkFlip[k] = 0; - } else { - meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; - min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; - meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma); - meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; - if (!dml_is_vertical_rotation(SourceScan[k])) { - meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; - } else { - meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; - } - if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { - meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; - } else { - meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; - } - TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; - TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; - TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; - } - } else { - TimePerMetaChunkNominal[k] = 0; - TimePerMetaChunkVBlank[k] = 0; - TimePerMetaChunkFlip[k] = 0; - TimePerChromaMetaChunkNominal[k] = 0; - TimePerChromaMetaChunkVBlank[k] = 0; - TimePerChromaMetaChunkFlip[k] = 0; - } - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (GPUVMEnable == true) { - if (!dml_is_vertical_rotation(SourceScan[k])) { - dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]); - } else { - dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]); - } - - if (use_one_row_for_frame[k]) { - dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0)); - } else { - dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0)); - } - - dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]); - dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]); - dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]); - dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]); - dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]); - dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); - dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); - - time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; - time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; - time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; - if (BytePerPixelC[k] == 0) { - time_per_pte_group_nom_chroma[k] = 0; - time_per_pte_group_vblank_chroma[k] = 0; - time_per_pte_group_flip_chroma[k] = 0; - } else { - if (!dml_is_vertical_rotation(SourceScan[k])) { - dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]); - } else { - dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]); - } - - if (use_one_row_for_frame[k]) { - dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0)); - } else { - dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0)); - } - dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); - dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); - - time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; - time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; - time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; - } - } else { - time_per_pte_group_nom_luma[k] = 0; - time_per_pte_group_vblank_luma[k] = 0; - time_per_pte_group_flip_luma[k] = 0; - time_per_pte_group_nom_chroma[k] = 0; - time_per_pte_group_vblank_chroma[k] = 0; - time_per_pte_group_flip_chroma[k] = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]); - dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); - - dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); - dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); - dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); - dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); - dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]); - dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]); - dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]); - dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]); - dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]); - dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]); - dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]); -#endif - } -} // CalculateMetaAndPTETimes - -static void CalculateVMGroupAndRequestTimes( - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t GPUVMEnable, - dml_uint_t GPUVMMaxPageTableLevels, - dml_uint_t HTotal[], - dml_uint_t BytePerPixelC[], - dml_float_t DestinationLinesToRequestVMInVBlank[], - dml_float_t DestinationLinesToRequestVMInImmediateFlip[], - dml_bool_t DCCEnable[], - dml_float_t PixelClock[], - dml_uint_t dpte_row_width_luma_ub[], - dml_uint_t dpte_row_width_chroma_ub[], - dml_uint_t vm_group_bytes[], - dml_uint_t dpde0_bytes_per_frame_ub_l[], - dml_uint_t dpde0_bytes_per_frame_ub_c[], - dml_uint_t meta_pte_bytes_per_frame_ub_l[], - dml_uint_t meta_pte_bytes_per_frame_ub_c[], - - // Output - dml_float_t TimePerVMGroupVBlank[], - dml_float_t TimePerVMGroupFlip[], - dml_float_t TimePerVMRequestVBlank[], - dml_float_t TimePerVMRequestFlip[]) -{ - dml_uint_t num_group_per_lower_vm_stage; - dml_uint_t num_req_per_lower_vm_stage; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); -#endif - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]); - dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); - dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); - dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); - dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); - dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); -#endif - - if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { - if (DCCEnable[k] == false) { - if (BytePerPixelC[k] > 0) { - num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) + - dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0)); - } else { - num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0)); - } - } else { - if (GPUVMMaxPageTableLevels == 1) { - if (BytePerPixelC[k] > 0) { - num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) + - dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); - } else { - num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); - } - } else { - if (BytePerPixelC[k] > 0) { - num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + - dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + - dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + - dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); - } else { - num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + - dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); - } - } - } - - if (DCCEnable[k] == false) { - if (BytePerPixelC[k] > 0) { - num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; - } else { - num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; - } - } else { - if (GPUVMMaxPageTableLevels == 1) { - if (BytePerPixelC[k] > 0) { - num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; - } else { - num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; - } - } else { - if (BytePerPixelC[k] > 0) { - num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; - } else { - num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; - } - } - } - - TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; - TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; - TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; - TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; - - if (GPUVMMaxPageTableLevels > 2) { - TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; - TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; - TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; - TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; - } - - } else { - TimePerVMGroupVBlank[k] = 0; - TimePerVMGroupFlip[k] = 0; - TimePerVMRequestVBlank[k] = 0; - TimePerVMRequestFlip[k] = 0; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); - dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); - dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); - dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); -#endif - } -} // CalculateVMGroupAndRequestTimes - -static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch, - struct CalculateStutterEfficiency_params_st *p) -{ - dml_float_t DETBufferingTimeY = 0; - dml_float_t SwathWidthYCriticalSurface = 0; - dml_float_t SwathHeightYCriticalSurface = 0; - dml_float_t VActiveTimeCriticalSurface = 0; - dml_float_t FrameTimeCriticalSurface = 0; - dml_uint_t BytePerPixelYCriticalSurface = 0; - dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0; - dml_uint_t DETBufferSizeYCriticalSurface = 0; - dml_float_t MinTTUVBlankCriticalSurface = 0; - dml_uint_t BlockWidth256BytesYCriticalSurface = 0; - dml_bool_t SinglePlaneCriticalSurface = 0; - dml_bool_t SinglePipeCriticalSurface = 0; - dml_float_t TotalCompressedReadBandwidth = 0; - dml_float_t TotalRowReadBandwidth = 0; - dml_float_t AverageDCCCompressionRate = 0; - dml_float_t EffectiveCompressedBufferSize = 0; - dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0; - dml_float_t StutterBurstTime = 0; - dml_uint_t TotalActiveWriteback = 0; - dml_float_t LinesInDETY = 0; - dml_float_t LinesInDETYRoundedDownToSwath = 0; - dml_float_t MaximumEffectiveCompressionLuma = 0; - dml_float_t MaximumEffectiveCompressionChroma = 0; - dml_float_t TotalZeroSizeRequestReadBandwidth = 0; - dml_float_t TotalZeroSizeCompressedReadBandwidth = 0; - dml_float_t AverageDCCZeroSizeFraction = 0; - dml_float_t AverageZeroSizeCompressionRate = 0; - - dml_bool_t FoundCriticalSurface = false; - - dml_uint_t TotalNumberOfActiveOTG = 0; - dml_float_t SinglePixelClock = 0; - dml_uint_t SingleHTotal = 0; - dml_uint_t SingleVTotal = 0; - dml_bool_t SameTiming = true; - - dml_float_t LastStutterPeriod = 0.0; - dml_float_t LastZ8StutterPeriod = 0.0; - - dml_uint_t SwathSizeCriticalSurface; - dml_uint_t LastChunkOfSwathSize; - dml_uint_t MissingPartOfLastSwathOfDETSize; - - TotalZeroSizeRequestReadBandwidth = 0; - TotalZeroSizeCompressedReadBandwidth = 0; - TotalRowReadBandwidth = 0; - TotalCompressedReadBandwidth = 0; - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { - if (p->DCCEnable[k] == true) { - if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { - MaximumEffectiveCompressionLuma = 2; - } else { - MaximumEffectiveCompressionLuma = 4; - } - TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]); - dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma); -#endif - TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k]; - TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; - - if (p->ReadBandwidthSurfaceChroma[k] > 0) { - if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { - MaximumEffectiveCompressionChroma = 2; - } else { - MaximumEffectiveCompressionChroma = 4; - } - TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); - dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]); - dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma); -#endif - TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k]; - TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; - } - } else { - TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; - } - TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); - } - } - - AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth; - AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); - dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); - dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); - dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); - dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); - dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); - dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); - dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); - dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B); - dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); - dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte); -#endif - if (AverageDCCZeroSizeFraction == 1) { - AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; - EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate; - } else if (AverageDCCZeroSizeFraction > 0) { - AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; - EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, - (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) + - dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate, - ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); - dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); - dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); - dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); -#endif - } else { - EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, - (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + - ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); - dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); -#endif - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); - dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); - dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); -#endif - - *p->StutterPeriod = 0; - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { - LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; - LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]); - DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); - dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY); - dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); - dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]); - dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]); - dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]); - dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); - dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]); -#endif - - if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) { - dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP; - - FoundCriticalSurface = true; - *p->StutterPeriod = DETBufferingTimeY; - FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; - VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; - BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; - SwathWidthYCriticalSurface = p->SwathWidthY[k]; - SwathHeightYCriticalSurface = p->SwathHeightY[k]; - BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; - LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); - DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; - MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; - SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); - SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); - dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); - dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface); - dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface); - dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface); - dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface); - dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface); - dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface); - dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface); - dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface); - dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface); - dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); -#endif - } - } - } - - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte); - dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); - dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth); - dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); - dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); - dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); - dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); - dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); - dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); - dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); -#endif - - StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW); - dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth)); - dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); - dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW); - dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); -#endif - StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); - - dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); - - TotalActiveWriteback = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->WritebackEnable[k]) { - TotalActiveWriteback = TotalActiveWriteback + 1; - } - } - - if (TotalActiveWriteback == 0) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); - dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); - dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); - dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); -#endif - *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100; - *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100; - *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); - *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); - } else { - *p->StutterEfficiencyNotIncludingVBlank = 0.; - *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; - *p->NumberOfStutterBurstsPerFrame = 0; - *p->Z8NumberOfStutterBurstsPerFrame = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); - dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); - dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); - dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); -#endif - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { - if (p->BlendingAndTiming[k] == k) { - if (TotalNumberOfActiveOTG == 0) { - SinglePixelClock = p->PixelClock[k]; - SingleHTotal = p->HTotal[k]; - SingleVTotal = p->VTotal[k]; - } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) { - SameTiming = false; - } - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; - } - } - } - - if (*p->StutterEfficiencyNotIncludingVBlank > 0) { - LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; - - if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && - LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) { - *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; - } else { - *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; - } - } else { - *p->StutterEfficiency = 0; - } - - if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { - LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; - if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) { - *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; - } else { - *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; - } - } else { - *p->Z8StutterEfficiency = 0.; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); - dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); - dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); - dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); - dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); - dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); - dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); -#endif - - SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface)); - LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); - MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface); - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && - (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); - dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface); - dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); - dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); - dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); -#endif -} // CalculateStutterEfficiency - -/// \CalculateSwathAndDETConfiguration -/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.) -static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch, - struct CalculateSwathAndDETConfiguration_params_st *p) -{ - dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__]; - dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__]; - dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__]; - dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__]; - dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__]; - dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__]; - dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__]; - dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__]; - - dml_uint_t TotalActiveDPP = 0; - dml_bool_t NoChromaOrLinearSurfaces = true; - dml_uint_t SurfaceDoingUnboundedRequest = 0; - - dml_uint_t DETBufferSizeInKByteForSwathCalculation; - - const long TTUFIFODEPTH = 8; - const long MAXIMUMCOMPRESSION = 4; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); - } -#endif - CalculateSwathWidth(p->ForceSingleDPP, - p->NumberOfActiveSurfaces, - p->SourcePixelFormat, - p->SourceScan, - p->ViewportStationary, - p->ViewportWidth, - p->ViewportHeight, - p->ViewportXStart, - p->ViewportYStart, - p->ViewportXStartC, - p->ViewportYStartC, - p->SurfaceWidthY, - p->SurfaceWidthC, - p->SurfaceHeightY, - p->SurfaceHeightC, - p->ODMMode, - p->BytePerPixY, - p->BytePerPixC, - p->Read256BytesBlockHeightY, - p->Read256BytesBlockHeightC, - p->Read256BytesBlockWidthY, - p->Read256BytesBlockWidthC, - p->BlendingAndTiming, - p->HActive, - p->HRatio, - p->DPPPerSurface, - - // Output - SwathWidthSingleDPP, - SwathWidthSingleDPPChroma, - p->SwathWidth, - p->SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, - p->swath_width_luma_ub, - p->swath_width_chroma_ub); - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); - RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); - dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); - dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); - dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); - dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); - dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); - dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); -#endif - if (p->SourcePixelFormat[k] == dml_420_10) { - RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256)); - RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256)); - } - } - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); - if (p->DPPPerSurface[k] > 0) - SurfaceDoingUnboundedRequest = k; - if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 || - p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha - || p->SurfaceTiling[k] == dml_sw_linear) { - NoChromaOrLinearSurfaces = false; - } - } - - *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP, - NoChromaOrLinearSurfaces, p->Output[0]); - - CalculateDETBufferSize(p->DETSizeOverride, - p->UseMALLForPStateChange, - p->ForceSingleDPP, - p->NumberOfActiveSurfaces, - *p->UnboundedRequestEnabled, - p->nomDETInKByte, - p->MaxTotalDETInKByte, - p->ConfigReturnBufferSizeInKByte, - p->MinCompressedBufferSizeInKByte, - p->ConfigReturnBufferSegmentSizeInkByte, - p->CompressedBufferSegmentSizeInkByteFinal, - p->SourcePixelFormat, - p->ReadBandwidthLuma, - p->ReadBandwidthChroma, - RoundedUpMaxSwathSizeBytesY, - RoundedUpMaxSwathSizeBytesC, - p->DPPPerSurface, - - // Output - p->DETBufferSizeInKByte, // per hubp pipe - p->CompressedBufferSizeInkByte); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); - dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); - dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); - dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal); - dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); - dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); -#endif - - *p->ViewportSizeSupport = true; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - - DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); -#endif - - if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k]; - p->SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - p->SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k]; - p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; - } else { - p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; - } - - if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || - p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { - *p->ViewportSizeSupport = false; - p->ViewportSizeSupportPerSurface[k] = false; - } else { - p->ViewportSizeSupportPerSurface[k] = true; - } - - if (p->SwathHeightC[k] == 0) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k); -#endif - p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; - p->DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k); -#endif - p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; - p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; - } else { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); -#endif - p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); - p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); - dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); - dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); - dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); -#endif - - } - - *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64; - if (*p->UnboundedRequestEnabled) { - *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b, - (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64) - - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64)); - } - *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256; -} // CalculateSwathAndDETConfiguration - -static void CalculateSwathWidth( - dml_bool_t ForceSingleDPP, - dml_uint_t NumberOfActiveSurfaces, - enum dml_source_format_class SourcePixelFormat[], - enum dml_rotation_angle SourceScan[], - dml_bool_t ViewportStationary[], - dml_uint_t ViewportWidth[], - dml_uint_t ViewportHeight[], - dml_uint_t ViewportXStart[], - dml_uint_t ViewportYStart[], - dml_uint_t ViewportXStartC[], - dml_uint_t ViewportYStartC[], - dml_uint_t SurfaceWidthY[], - dml_uint_t SurfaceWidthC[], - dml_uint_t SurfaceHeightY[], - dml_uint_t SurfaceHeightC[], - enum dml_odm_mode ODMMode[], - dml_uint_t BytePerPixY[], - dml_uint_t BytePerPixC[], - dml_uint_t Read256BytesBlockHeightY[], - dml_uint_t Read256BytesBlockHeightC[], - dml_uint_t Read256BytesBlockWidthY[], - dml_uint_t Read256BytesBlockWidthC[], - dml_uint_t BlendingAndTiming[], - dml_uint_t HActive[], - dml_float_t HRatio[], - dml_uint_t DPPPerSurface[], - - // Output - dml_uint_t SwathWidthSingleDPPY[], - dml_uint_t SwathWidthSingleDPPC[], - dml_uint_t SwathWidthY[], // per-pipe - dml_uint_t SwathWidthC[], // per-pipe - dml_uint_t MaximumSwathHeightY[], - dml_uint_t MaximumSwathHeightC[], - dml_uint_t swath_width_luma_ub[], // per-pipe - dml_uint_t swath_width_chroma_ub[]) // per-pipe -{ - enum dml_odm_mode MainSurfaceODMMode; - dml_uint_t surface_width_ub_l; - dml_uint_t surface_height_ub_l; - dml_uint_t surface_width_ub_c = 0; - dml_uint_t surface_height_ub_c = 0; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); -#endif - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_vertical_rotation(SourceScan[k])) { - SwathWidthSingleDPPY[k] = ViewportWidth[k]; - } else { - SwathWidthSingleDPPY[k] = ViewportHeight[k]; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]); - dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]); - dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); -#endif - - MainSurfaceODMMode = ODMMode[k]; - for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { - if (BlendingAndTiming[k] == j) { - MainSurfaceODMMode = ODMMode[j]; - } - } - - if (ForceSingleDPP) { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - } else { - if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) { - SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true))); - } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) { - SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true))); - } else if (DPPPerSurface[k] == 2) { - SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; - } else { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - } - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]); - dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]); - dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); - dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); - dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); -#endif - - if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { - SwathWidthC[k] = SwathWidthY[k] / 2; - SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; - } else { - SwathWidthC[k] = SwathWidthY[k]; - SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; - } - - if (ForceSingleDPP == true) { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - SwathWidthC[k] = SwathWidthSingleDPPC[k]; - } - - surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); - surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); - - if (!dml_is_vertical_rotation(SourceScan[k])) { - MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; - MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; - if (ViewportStationary[k] && DPPPerSurface[k] == 1) { - swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k]))); - } else { - swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k])); - } - if (BytePerPixC[k] > 0) { - surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); - if (ViewportStationary[k] && DPPPerSurface[k] == 1) { - swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k]))); - } else { - swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k])); - } - } else { - swath_width_chroma_ub[k] = 0; - } - } else { - MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; - MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; - - if (ViewportStationary[k] && DPPPerSurface[k] == 1) { - swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]))); - } else { - swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); - } - if (BytePerPixC[k] > 0) { - surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); - if (ViewportStationary[k] && DPPPerSurface[k] == 1) { - swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k]))); - } else { - swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); - } - } else { - swath_width_chroma_ub[k] = 0; - } - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); - dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); - dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); - dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); - dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); - dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); - dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); - dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); - dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]); - dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); - dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); - dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); - dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); - dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); -#endif - - } -} // CalculateSwathWidth - -static noinline_for_stack dml_float_t CalculateExtraLatency( - dml_uint_t RoundTripPingLatencyCycles, - dml_uint_t ReorderingBytes, - dml_float_t DCFCLK, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t PixelChunkSizeInKByte, - dml_uint_t TotalNumberOfDCCActiveDPP, - dml_uint_t MetaChunkSize, - dml_float_t ReturnBW, - dml_bool_t GPUVMEnable, - dml_bool_t HostVMEnable, - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t NumberOfDPP[], - dml_uint_t dpte_group_bytes[], - dml_float_t HostVMInefficiencyFactor, - dml_uint_t HostVMMinPageSize, - dml_uint_t HostVMMaxNonCachedPageTableLevels) -{ - dml_float_t ExtraLatencyBytes; - dml_float_t ExtraLatency; - - ExtraLatencyBytes = CalculateExtraLatencyBytes( - ReorderingBytes, - TotalNumberOfActiveDPP, - PixelChunkSizeInKByte, - TotalNumberOfDCCActiveDPP, - MetaChunkSize, - GPUVMEnable, - HostVMEnable, - NumberOfActiveSurfaces, - NumberOfDPP, - dpte_group_bytes, - HostVMInefficiencyFactor, - HostVMMinPageSize, - HostVMMaxNonCachedPageTableLevels); - - ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); - dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); - dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); - dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); - dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); -#endif - - return ExtraLatency; -} // CalculateExtraLatency - -static dml_uint_t CalculateHostVMDynamicLevels( - dml_bool_t GPUVMEnable, - dml_bool_t HostVMEnable, - dml_uint_t HostVMMinPageSize, - dml_uint_t HostVMMaxNonCachedPageTableLevels) -{ - dml_uint_t HostVMDynamicLevels = 0; - - if (GPUVMEnable && HostVMEnable) { - if (HostVMMinPageSize < 2048) - HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; - else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) - HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1); - else - HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2); - } else { - HostVMDynamicLevels = 0; - } - return HostVMDynamicLevels; -} - -static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t PixelChunkSizeInKByte, - dml_uint_t TotalNumberOfDCCActiveDPP, - dml_uint_t MetaChunkSize, - dml_bool_t GPUVMEnable, - dml_bool_t HostVMEnable, - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t NumberOfDPP[], - dml_uint_t dpte_group_bytes[], - dml_float_t HostVMInefficiencyFactor, - dml_uint_t HostVMMinPageSize, - dml_uint_t HostVMMaxNonCachedPageTableLevels) -{ - dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels); - dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; - - if (GPUVMEnable == true) { - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; - } - } - return (dml_uint_t)(ret); -} - -static dml_float_t CalculateUrgentLatency( - dml_float_t UrgentLatencyPixelDataOnly, - dml_float_t UrgentLatencyPixelMixedWithVMData, - dml_float_t UrgentLatencyVMDataOnly, - dml_bool_t DoUrgentLatencyAdjustment, - dml_float_t UrgentLatencyAdjustmentFabricClockComponent, - dml_float_t UrgentLatencyAdjustmentFabricClockReference, - dml_float_t FabricClock) -{ - dml_float_t ret; - - ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); - if (DoUrgentLatencyAdjustment == true) { - ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); - } - return ret; -} - -static dml_float_t RequiredDTBCLK( - dml_bool_t DSCEnable, - dml_float_t PixelClock, - enum dml_output_format_class OutputFormat, - dml_float_t OutputBpp, - dml_uint_t DSCSlices, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_uint_t AudioRate, - dml_uint_t AudioLayout) -{ - if (DSCEnable != true) { - return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); - } else { - dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2); - dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive; - return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; - } -} - -static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p) -{ - struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals; - - s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; - for (dml_uint_t j = 0; j < 2; ++j) { - - - s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]); - } - - for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { - s->NoOfDPPState[k] = p->NoOfDPP[j][k]; - } - - s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j]; - - s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth); - - s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j], - p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes, - 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); - s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth; - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - dml_float_t DCFCLKCyclesRequiredInPrefetch; - dml_float_t PrefetchTime; - - s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth; - DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k]; - s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k]; - s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; - - s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode, - p->UseMALLForPStateChange[k], - p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - p->DRRDisplay[k], - p->DRAMClockChangeLatencyFinal, - p->FCLKChangeLatency, - p->UrgLatency, - p->SREnterPlusExitTime); - - PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k]; - - if (PrefetchTime > 0) { - dml_float_t ExpectedVRatioPrefetch; - ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); - s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4); - if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) { - s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth; - } - } else { - s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; - } - if (p->DynamicMetadataEnable[k] == true) { - dml_float_t TSetupPipe; - dml_float_t TdmbfPipe; - dml_float_t TdmsksPipe; - dml_float_t TdmecPipe; - dml_float_t AllowedTimeForUrgentExtraLatency; - - CalculateVUpdateAndDynamicMetadataParameters( - p->MaxInterDCNTileRepeaters, - p->RequiredDPPCLKPerSurface[j][k], - p->RequiredDISPCLK[j], - p->ProjectedDCFCLKDeepSleep[j], - p->PixelClock[k], - p->HTotal[k], - p->VTotal[k] - p->VActive[k], - p->DynamicMetadataTransmittedBytes[k], - p->DynamicMetadataLinesBeforeActiveRequired[k], - p->Interlace[k], - p->ProgressiveToInterlaceUnitInOPP, - - // Output - &TSetupPipe, - &TdmbfPipe, - &TdmecPipe, - &TdmsksPipe, - &s->dummy1, - &s->dummy2, - &s->dummy3); - - AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k]; - if (AllowedTimeForUrgentExtraLatency > 0) { - s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); - } else { - s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; - } - } - } - s->DCFCLKRequiredForPeakBandwidth = 0; - for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { - s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k]; - } - s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0); - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - dml_float_t MaximumTvmPlus2Tr0PlusTsw; - MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k]; - if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) { - s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState; - } else { - s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth, - 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4), - (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0)); - } - } - p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth)); - } -} - - -static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, - dml_uint_t TotalNumberOfActiveDPP, - dml_bool_t NoChromaOrLinear, - enum dml_output_encoder_class Output) -{ - dml_bool_t ret_val = false; - - ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable - && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); - if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) { - ret_val = false; - } - return (ret_val); -} - -static void CalculateSurfaceSizeInMall( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t MALLAllocatedForDCN, - enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], - dml_bool_t DCCEnable[], - dml_bool_t ViewportStationary[], - dml_uint_t ViewportXStartY[], - dml_uint_t ViewportYStartY[], - dml_uint_t ViewportXStartC[], - dml_uint_t ViewportYStartC[], - dml_uint_t ViewportWidthY[], - dml_uint_t ViewportHeightY[], - dml_uint_t BytesPerPixelY[], - dml_uint_t ViewportWidthC[], - dml_uint_t ViewportHeightC[], - dml_uint_t BytesPerPixelC[], - dml_uint_t SurfaceWidthY[], - dml_uint_t SurfaceWidthC[], - dml_uint_t SurfaceHeightY[], - dml_uint_t SurfaceHeightC[], - dml_uint_t Read256BytesBlockWidthY[], - dml_uint_t Read256BytesBlockWidthC[], - dml_uint_t Read256BytesBlockHeightY[], - dml_uint_t Read256BytesBlockHeightC[], - dml_uint_t ReadBlockWidthY[], - dml_uint_t ReadBlockWidthC[], - dml_uint_t ReadBlockHeightY[], - dml_uint_t ReadBlockHeightC[], - - // Output - dml_uint_t SurfaceSizeInMALL[], - dml_bool_t *ExceededMALLSize) -{ - dml_uint_t TotalSurfaceSizeInMALL = 0; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (ViewportStationary[k]) { - SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) * - dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * - BytesPerPixelY[k]); - - if (ReadBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * - dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]); - } - if (DCCEnable[k] == true) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) * - dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256); - if (Read256BytesBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) * - dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256); - } - } - } else { - SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); - if (ReadBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * - dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); - } - if (DCCEnable[k] == true) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) * - dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256); - - if (Read256BytesBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + - dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) * - dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256); - } - } - } - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable) - TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; - } - *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); -} // CalculateSurfaceSizeInMall - -static void CalculateDETBufferSize( - dml_uint_t DETSizeOverride[], - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - dml_bool_t ForceSingleDPP, - dml_uint_t NumberOfActiveSurfaces, - dml_bool_t UnboundedRequestEnabled, - dml_uint_t nomDETInKByte, - dml_uint_t MaxTotalDETInKByte, - dml_uint_t ConfigReturnBufferSizeInKByte, - dml_uint_t MinCompressedBufferSizeInKByte, - dml_uint_t ConfigReturnBufferSegmentSizeInkByte, - dml_uint_t CompressedBufferSegmentSizeInkByteFinal, - enum dml_source_format_class SourcePixelFormat[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_uint_t RoundedUpMaxSwathSizeBytesY[], - dml_uint_t RoundedUpMaxSwathSizeBytesC[], - dml_uint_t DPPPerSurface[], - // Output - dml_uint_t DETBufferSizeInKByte[], - dml_uint_t *CompressedBufferSizeInkByte) -{ - dml_uint_t DETBufferSizePoolInKByte; - dml_uint_t NextDETBufferPieceInKByte; - dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__]; - dml_bool_t NextPotentialSurfaceToAssignDETPieceFound; - dml_uint_t NextSurfaceToAssignDETPiece; - dml_float_t TotalBandwidth; - dml_float_t BandwidthOfSurfacesNotAssignedDETPiece; - dml_uint_t max_minDET; - dml_uint_t minDET; - dml_uint_t minDET_pipe; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); - dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); - dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); - dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal); -#endif - - // Note: Will use default det size if that fits 2 swaths - if (UnboundedRequestEnabled) { - if (DETSizeOverride[0] > 0) { - DETBufferSizeInKByte[0] = DETSizeOverride[0]; - } else { - DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); - } - *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; - } else { - DETBufferSizePoolInKByte = MaxTotalDETInKByte; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByte[k] = 0; - if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { - max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; - } else { - max_minDET = nomDETInKByte; - } - minDET = 128; - minDET_pipe = 0; - - // add DET resource until can hold 2 full swaths - while (minDET <= max_minDET && minDET_pipe == 0) { - if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) - minDET_pipe = minDET; - minDET = minDET + ConfigReturnBufferSegmentSizeInkByte; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET); - dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET); - dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); - dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); -#endif - - if (minDET_pipe == 0) { - minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe); -#endif - } - - if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { - DETBufferSizeInKByte[k] = 0; - } else if (DETSizeOverride[k] > 0) { - DETBufferSizeInKByte[k] = DETSizeOverride[k]; - DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; - } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { - DETBufferSizeInKByte[k] = minDET_pipe; - DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); - dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]); - dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte); -#endif - } - - TotalBandwidth = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) - TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - } - dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); -#endif - dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); - BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - - if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { - DETPieceAssignedToThisSurfaceAlready[k] = true; - } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { - DETPieceAssignedToThisSurfaceAlready[k] = true; - BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; - } else { - DETPieceAssignedToThisSurfaceAlready[k] = false; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece); -#endif - } - - for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { - NextPotentialSurfaceToAssignDETPieceFound = false; - NextSurfaceToAssignDETPiece = 0; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); - dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); - dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); - dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); - dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece); -#endif - if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || - ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { - NextSurfaceToAssignDETPiece = k; - NextPotentialSurfaceToAssignDETPieceFound = true; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); -#endif - } - - if (NextPotentialSurfaceToAssignDETPieceFound) { - // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue - // - //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / - // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); - //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / - // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); - // - //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1); - //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2); - - NextDETBufferPieceInKByte = (dml_uint_t)(dml_min( - dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / - ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true) - * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, - dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte); - dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece); - dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); - dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); - dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); - dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte); - dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); -#endif - - DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); -#ifdef __DML_VBA_DEBUG__ - dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); -#endif - - DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; - DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; - BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); - } - } - *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; - } - *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); - dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); - } -#endif -} // CalculateDETBufferSize - - -/// @brief Calculate the bound for return buffer sizing -static void CalculateMaxDETAndMinCompressedBufferSize( - dml_uint_t ConfigReturnBufferSizeInKByte, - dml_uint_t ConfigReturnBufferSegmentSizeInKByte, - dml_uint_t ROBBufferSizeInKByte, - dml_uint_t MaxNumDPP, - dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size - dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA - - // Output - dml_uint_t *MaxTotalDETInKByte, - dml_uint_t *nomDETInKByte, - dml_uint_t *MinCompressedBufferSizeInKByte) -{ - *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; - *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); - *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); - dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); - dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); - dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); - dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); -#endif - - if (nomDETInKByteOverrideEnable) { - *nomDETInKByte = nomDETInKByteOverrideValue; - dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); - } -} // CalculateMaxDETAndMinCompressedBufferSize - -/// @brief Calculate all the RQ request attributes, like row height and # swath -static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch, - struct CalculateVMRowAndSwath_params_st *p) -{ - struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals; - - s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->HostVMEnable == true) { - p->vm_group_bytes[k] = 512; - p->dpte_group_bytes[k] = 512; - } else if (p->GPUVMEnable == true) { - p->vm_group_bytes[k] = 2048; - if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } - } else { - p->vm_group_bytes[k] = 0; - p->dpte_group_bytes[k] = 0; - } - - if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 || - p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) { - if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { - s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; - s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; - } else { - s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; - s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; - } - - s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( - p->myPipe[k].ViewportStationary, - p->myPipe[k].DCCEnable, - p->myPipe[k].DPPPerSurface, - p->myPipe[k].BlockHeight256BytesC, - p->myPipe[k].BlockWidth256BytesC, - p->myPipe[k].SourcePixelFormat, - p->myPipe[k].SurfaceTiling, - p->myPipe[k].BytePerPixelC, - p->myPipe[k].SourceScan, - p->SwathWidthC[k], - p->myPipe[k].ViewportHeightChroma, - p->myPipe[k].ViewportXStartC, - p->myPipe[k].ViewportYStartC, - p->GPUVMEnable, - p->GPUVMMaxPageTableLevels, - p->GPUVMMinPageSizeKBytes[k], - s->PTEBufferSizeInRequestsForChroma[k], - p->myPipe[k].PitchC, - p->myPipe[k].DCCMetaPitchC, - p->myPipe[k].BlockWidthC, - p->myPipe[k].BlockHeightC, - - // Output - &s->MetaRowByteC[k], - &s->PixelPTEBytesPerRowC[k], - &s->PixelPTEBytesPerRowStorageC[k], - &p->dpte_row_width_chroma_ub[k], - &p->dpte_row_height_chroma[k], - &p->dpte_row_height_linear_chroma[k], - &s->PixelPTEBytesPerRowC_one_row_per_frame[k], - &s->dpte_row_width_chroma_ub_one_row_per_frame[k], - &s->dpte_row_height_chroma_one_row_per_frame[k], - &p->meta_req_width_chroma[k], - &p->meta_req_height_chroma[k], - &p->meta_row_width_chroma[k], - &p->meta_row_height_chroma[k], - &p->PixelPTEReqWidthC[k], - &p->PixelPTEReqHeightC[k], - &p->PTERequestSizeC[k], - &p->dpde0_bytes_per_frame_ub_c[k], - &p->meta_pte_bytes_per_frame_ub_c[k]); - - p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines ( - p->myPipe[k].VRatioChroma, - p->myPipe[k].VTapsChroma, - p->myPipe[k].InterlaceEnable, - p->myPipe[k].ProgressiveToInterlaceUnitInOPP, - p->myPipe[k].SwathHeightC, - p->myPipe[k].SourceScan, - p->myPipe[k].ViewportStationary, - p->SwathWidthC[k], - p->myPipe[k].ViewportHeightChroma, - p->myPipe[k].ViewportXStartC, - p->myPipe[k].ViewportYStartC, - - // Output - &p->VInitPreFillC[k], - &p->MaxNumSwathC[k]); - } else { - s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; - s->PTEBufferSizeInRequestsForChroma[k] = 0; - s->PixelPTEBytesPerRowC[k] = 0; - s->PixelPTEBytesPerRowStorageC[k] = 0; - s->PDEAndMetaPTEBytesFrameC = 0; - s->MetaRowByteC[k] = 0; - p->MaxNumSwathC[k] = 0; - p->PrefetchSourceLinesC[k] = 0; - s->dpte_row_height_chroma_one_row_per_frame[k] = 0; - s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; - } - - s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( - p->myPipe[k].ViewportStationary, - p->myPipe[k].DCCEnable, - p->myPipe[k].DPPPerSurface, - p->myPipe[k].BlockHeight256BytesY, - p->myPipe[k].BlockWidth256BytesY, - p->myPipe[k].SourcePixelFormat, - p->myPipe[k].SurfaceTiling, - p->myPipe[k].BytePerPixelY, - p->myPipe[k].SourceScan, - p->SwathWidthY[k], - p->myPipe[k].ViewportHeight, - p->myPipe[k].ViewportXStart, - p->myPipe[k].ViewportYStart, - p->GPUVMEnable, - p->GPUVMMaxPageTableLevels, - p->GPUVMMinPageSizeKBytes[k], - s->PTEBufferSizeInRequestsForLuma[k], - p->myPipe[k].PitchY, - p->myPipe[k].DCCMetaPitchY, - p->myPipe[k].BlockWidthY, - p->myPipe[k].BlockHeightY, - - // Output - &s->MetaRowByteY[k], - &s->PixelPTEBytesPerRowY[k], - &s->PixelPTEBytesPerRowStorageY[k], - &p->dpte_row_width_luma_ub[k], - &p->dpte_row_height_luma[k], - &p->dpte_row_height_linear_luma[k], - &s->PixelPTEBytesPerRowY_one_row_per_frame[k], - &s->dpte_row_width_luma_ub_one_row_per_frame[k], - &s->dpte_row_height_luma_one_row_per_frame[k], - &p->meta_req_width[k], - &p->meta_req_height[k], - &p->meta_row_width[k], - &p->meta_row_height[k], - &p->PixelPTEReqWidthY[k], - &p->PixelPTEReqHeightY[k], - &p->PTERequestSizeY[k], - &p->dpde0_bytes_per_frame_ub_l[k], - &p->meta_pte_bytes_per_frame_ub_l[k]); - - p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( - p->myPipe[k].VRatio, - p->myPipe[k].VTaps, - p->myPipe[k].InterlaceEnable, - p->myPipe[k].ProgressiveToInterlaceUnitInOPP, - p->myPipe[k].SwathHeightY, - p->myPipe[k].SourceScan, - p->myPipe[k].ViewportStationary, - p->SwathWidthY[k], - p->myPipe[k].ViewportHeight, - p->myPipe[k].ViewportXStart, - p->myPipe[k].ViewportYStart, - - // Output - &p->VInitPreFillY[k], - &p->MaxNumSwathY[k]); - - p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels); - p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k]; - - if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { - p->PTEBufferSizeNotExceeded[k] = true; - } else { - p->PTEBufferSizeNotExceeded[k] = false; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); - dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); - dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); - dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); -#endif - } - s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && - s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]); - dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY); - dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC); - dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); - dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); -#endif - } - - CalculateMALLUseForStaticScreen( - p->NumberOfActiveSurfaces, - p->MALLAllocatedForDCN, - p->UseMALLForStaticScreen, // mode - p->SurfaceSizeInMALL, - s->one_row_per_frame_fits_in_buffer, - // Output - p->UsesMALLForStaticScreen); // boolen - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->PTEBufferModeOverrideEn[k] == 1) { - p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k]; - } - p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || - (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64); - p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12); - } - - for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); - dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]); -#endif - p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || - (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)); - - p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame); - - if (p->use_one_row_for_frame[k]) { - p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; - p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; - s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; - p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; - p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; - s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; - p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; - } - - if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) { - p->DCCMetaBufferSizeNotExceeded[k] = true; - } else { - p->DCCMetaBufferSizeNotExceeded[k] = false; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]); - dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes); - dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); -#endif - } - s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); - s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); - p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; - if (p->use_one_row_for_frame[k]) - p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; - - CalculateRowBandwidth( - p->GPUVMEnable, - p->myPipe[k].SourcePixelFormat, - p->myPipe[k].VRatio, - p->myPipe[k].VRatioChroma, - p->myPipe[k].DCCEnable, - p->myPipe[k].HTotal / p->myPipe[k].PixelClock, - s->MetaRowByteY[k], - s->MetaRowByteC[k], - p->meta_row_height[k], - p->meta_row_height_chroma[k], - s->PixelPTEBytesPerRowY[k], - s->PixelPTEBytesPerRowC[k], - p->dpte_row_height_luma[k], - p->dpte_row_height_chroma[k], - - // Output - &p->meta_row_bw[k], - &p->dpte_row_bw[k]); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); - dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]); - dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); - dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); - dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); - dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); - dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); -#endif - } -} - -static void CalculateOutputLink( - dml_float_t PHYCLKPerState, - dml_float_t PHYCLKD18PerState, - dml_float_t PHYCLKD32PerState, - dml_float_t Downspreading, - dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, - enum dml_output_encoder_class Output, - enum dml_output_format_class OutputFormat, - dml_uint_t HTotal, - dml_uint_t HActive, - dml_float_t PixelClockBackEnd, - dml_float_t ForcedOutputLinkBPP, - dml_uint_t DSCInputBitPerComponent, - dml_uint_t NumberOfDSCSlices, - dml_float_t AudioSampleRate, - dml_uint_t AudioSampleLayout, - enum dml_odm_mode ODMModeNoDSC, - enum dml_odm_mode ODMModeDSC, - enum dml_dsc_enable DSCEnable, - dml_uint_t OutputLinkDPLanes, - enum dml_output_link_dp_rate OutputLinkDPRate, - - // Output - dml_bool_t *RequiresDSC, - dml_bool_t *RequiresFEC, - dml_float_t *OutBpp, - enum dml_output_type_and_rate__type *OutputType, - enum dml_output_type_and_rate__rate *OutputRate, - dml_uint_t *RequiredSlots) -{ - dml_bool_t LinkDSCEnable; - dml_uint_t dummy; - *RequiresDSC = false; - *RequiresFEC = false; - *OutBpp = 0; - - *OutputType = dml_output_type_unknown; - *OutputRate = dml_output_rate_unknown; - - if (IsMainSurfaceUsingTheIndicatedTiming) { - if (Output == dml_hdmi) { - *RequiresDSC = false; - *RequiresFEC = false; - *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = "HDMI"; - *OutputType = dml_output_type_hdmi; - - } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) { - if (DSCEnable == dml_dsc_enable) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml_dp || Output == dml_dp2p0) { - *RequiresFEC = true; - } else { - *RequiresFEC = false; - } - } else { - *RequiresDSC = false; - LinkDSCEnable = false; - if (Output == dml_dp2p0) { - *RequiresFEC = true; - } else { - *RequiresFEC = false; - } - } - if (Output == dml_dp2p0) { - *OutBpp = 0; - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) { - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR10"; - *OutputType = dml_output_type_dp2p0; - *OutputRate = dml_output_rate_dp_rate_uhbr10; - } - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) { - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR13p5"; - *OutputType = dml_output_type_dp2p0; - *OutputRate = dml_output_rate_dp_rate_uhbr13p5; - } - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR20"; - *OutputType = dml_output_type_dp2p0; - *OutputRate = dml_output_rate_dp_rate_uhbr20; - } - } else { // output is dp or edp - *OutBpp = 0; - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) { - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR"; - *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; - *OutputRate = dml_output_rate_dp_rate_hbr; - } - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) { - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR2"; - *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; - *OutputRate = dml_output_rate_dp_rate_hbr2; - } - if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR3"; - *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; - *OutputRate = dml_output_rate_dp_rate_hbr3; - } - } - } - } -} - -/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy -static void CalculateODMMode( - dml_uint_t MaximumPixelsPerLinePerDSCUnit, - dml_uint_t HActive, - enum dml_output_encoder_class Output, - enum dml_output_format_class OutputFormat, - enum dml_odm_use_policy ODMUse, - dml_float_t StateDispclk, - dml_float_t MaxDispclk, - dml_bool_t DSCEnable, - dml_uint_t TotalNumberOfActiveDPP, - dml_uint_t MaxNumDPP, - dml_float_t PixelClock, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKRampingMargin, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_uint_t NumberOfDSCSlices, - - // Output - dml_bool_t *TotalAvailablePipesSupport, - dml_uint_t *NumberOfDPP, - enum dml_odm_mode *ODMMode, - dml_float_t *RequiredDISPCLKPerSurface) -{ - - dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine; - dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne; - dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne; - - SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); - SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); - SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); - *TotalAvailablePipesSupport = true; - - if (OutputFormat == dml_420) { - if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH) - *TotalAvailablePipesSupport = false; - else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) - ODMUse = dml_odm_use_policy_combine_4to1; - else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1) - ODMUse = dml_odm_use_policy_combine_2to1; - if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1) - *TotalAvailablePipesSupport = false; - if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1) - *TotalAvailablePipesSupport = false; - } - - if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed) - *ODMMode = dml_odm_mode_bypass; - else if (ODMUse == dml_odm_use_policy_combine_2to1) - *ODMMode = dml_odm_mode_combine_2to1; - else if (ODMUse == dml_odm_use_policy_combine_4to1) - *ODMMode = dml_odm_mode_combine_4to1; - else if (ODMUse == dml_odm_use_policy_split_1to2) - *ODMMode = dml_odm_mode_split_1to2; - else if (ODMUse == dml_odm_use_policy_mso_1to2) - *ODMMode = dml_odm_mode_mso_1to2; - else if (ODMUse == dml_odm_use_policy_mso_1to4) - *ODMMode = dml_odm_mode_mso_1to4; - - *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; - *NumberOfDPP = 0; - - if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && - (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) { - if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { - *ODMMode = dml_odm_mode_combine_4to1; - *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; - *NumberOfDPP = 4; - } else { - *TotalAvailablePipesSupport = false; - } - } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && - ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || - (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { - if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { - *ODMMode = dml_odm_mode_combine_2to1; - *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; - *NumberOfDPP = 2; - } else { - *TotalAvailablePipesSupport = false; - } - } else { - if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) { - *NumberOfDPP = 1; - } else { - *TotalAvailablePipesSupport = false; - } - } -} - -/// @brief Calculate the required DISPCLK given the odm mode and pixclk -static dml_float_t CalculateRequiredDispclk( - enum dml_odm_mode ODMMode, - dml_float_t PixelClock, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKRampingMargin, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_float_t MaxDispclk) -{ - dml_float_t RequiredDispclk = 0.; - dml_float_t PixelClockAfterODM; - - dml_float_t DISPCLKWithRampingRoundedToDFSGranularity; - dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity; - dml_float_t MaxDispclkRoundedDownToDFSGranularity; - - if (ODMMode == dml_odm_mode_combine_4to1) { - PixelClockAfterODM = PixelClock / 4; - } else if (ODMMode == dml_odm_mode_combine_2to1) { - PixelClockAfterODM = PixelClock / 2; - } else { - PixelClockAfterODM = PixelClock; - } - - DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed); - DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed); - MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); - - if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { - RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; - } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { - RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; - } else { - RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; - } - - return RequiredDispclk; -} - -/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter -static void CalculateSinglePipeDPPCLKAndSCLThroughput( - dml_float_t HRatio, - dml_float_t HRatioChroma, - dml_float_t VRatio, - dml_float_t VRatioChroma, - dml_float_t MaxDCHUBToPSCLThroughput, - dml_float_t MaxPSCLToLBThroughput, - dml_float_t PixelClock, - enum dml_source_format_class SourcePixelFormat, - dml_uint_t HTaps, - dml_uint_t HTapsChroma, - dml_uint_t VTaps, - dml_uint_t VTapsChroma, - - // Output - dml_float_t *PSCL_THROUGHPUT, - dml_float_t *PSCL_THROUGHPUT_CHROMA, - dml_float_t *DPPCLKUsingSingleDPP) -{ - dml_float_t DPPCLKUsingSingleDPPLuma; - dml_float_t DPPCLKUsingSingleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) - DPPCLKUsingSingleDPPLuma = 2 * PixelClock; - - if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) - DPPCLKUsingSingleDPPChroma = 2 * PixelClock; - *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); - } -} - -/// @brief Calculate the actual dppclk freq -/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane -/// @param DPPPerSurface Number of DPP for each plane -static void CalculateDPPCLK( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, - dml_float_t DISPCLKDPPCLKVCOSpeed, - dml_float_t DPPCLKUsingSingleDPP[], - dml_uint_t DPPPerSurface[], - - // Output - dml_float_t *GlobalDPPCLK, - dml_float_t Dppclk[]) -{ - *GlobalDPPCLK = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); - } - *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); - - dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK); - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); - dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]); - } -} - -static void CalculateMALLUseForStaticScreen( - dml_uint_t NumberOfActiveSurfaces, - dml_uint_t MALLAllocatedForDCNFinal, - enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, - dml_uint_t SurfaceSizeInMALL[], - dml_bool_t one_row_per_frame_fits_in_buffer[], - - // Output - dml_bool_t UsesMALLForStaticScreen[]) -{ - - dml_uint_t SurfaceToAddToMALL; - dml_bool_t CanAddAnotherSurfaceToMALL; - dml_uint_t TotalSurfaceSizeInMALL; - - TotalSurfaceSizeInMALL = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable); - if (UsesMALLForStaticScreen[k]) - TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]); - dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); -#endif - } - - SurfaceToAddToMALL = 0; - CanAddAnotherSurfaceToMALL = true; - while (CanAddAnotherSurfaceToMALL) { - CanAddAnotherSurfaceToMALL = false; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && - !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] && - (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { - CanAddAnotherSurfaceToMALL = true; - SurfaceToAddToMALL = k; - dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]); - } - } - if (CanAddAnotherSurfaceToMALL) { - UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; - TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); - dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); -#endif - } - } -} - -// @brief Calculate return bw for VM only traffic -dml_float_t dml_get_return_bw_mbps_vm_only( - const struct soc_bounding_box_st *soc, - dml_bool_t use_ideal_dram_bw_strobe, - dml_bool_t HostVMEnable, - dml_float_t DCFCLK, - dml_float_t FabricClock, - dml_float_t DRAMSpeed) -{ - dml_float_t VMDataOnlyReturnBW = - dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, - FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0, - DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes * - ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); - dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); - dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); - dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); - dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); -#endif - return VMDataOnlyReturnBW; -} - -// Function: dml_get_return_bw_mbps -// Megabyte per second -dml_float_t dml_get_return_bw_mbps( - const struct soc_bounding_box_st *soc, - dml_bool_t use_ideal_dram_bw_strobe, - dml_bool_t HostVMEnable, - dml_float_t DCFCLK, - dml_float_t FabricClock, - dml_float_t DRAMSpeed) -{ - dml_float_t ReturnBW = 0.; - dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK; - dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; - dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; - dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, - IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, - IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : - soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100); - dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, - IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, - IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : - soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100); - - if (HostVMEnable != true) { - ReturnBW = PixelDataOnlyReturnBW; - } else { - ReturnBW = PixelMixedWithVMDataReturnBW; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); - dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); - dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); - dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); - dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); - dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); - dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); - dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); - dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); - dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); -#endif - return ReturnBW; -} - -// Function: dml_get_return_dram_bw_mbps -// Megabyte per second -static dml_float_t dml_get_return_dram_bw_mbps( - const struct soc_bounding_box_st *soc, - dml_bool_t use_ideal_dram_bw_strobe, - dml_bool_t HostVMEnable, - dml_float_t DRAMSpeed) -{ - dml_float_t ReturnDRAMBW = 0.; - dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; - dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : - soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100; - dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : - soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100; - - if (HostVMEnable != true) { - ReturnDRAMBW = PixelDataOnlyReturnBW; - } else { - ReturnDRAMBW = PixelMixedWithVMDataReturnBW; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); - dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); - dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); - dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); - dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); - dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); - dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW); -#endif - return ReturnDRAMBW; -} - -/// @brief BACKEND -static dml_uint_t DSCDelayRequirement( - dml_bool_t DSCEnabled, - enum dml_odm_mode ODMMode, - dml_uint_t DSCInputBitPerComponent, - dml_float_t OutputBpp, - dml_uint_t HActive, - dml_uint_t HTotal, - dml_uint_t NumberOfDSCSlices, - enum dml_output_format_class OutputFormat, - enum dml_output_encoder_class Output, - dml_float_t PixelClock, - dml_float_t PixelClockBackEnd) -{ - dml_uint_t DSCDelayRequirement_val = 0; - - if (DSCEnabled == true && OutputBpp != 0) { - if (ODMMode == dml_odm_mode_combine_4to1) { - DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), - (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); - } else if (ODMMode == dml_odm_mode_combine_2to1) { - DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), - (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); - } else { - DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), - NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); - } - DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0)); - DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); - - } else { - DSCDelayRequirement_val = 0; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled); - dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode); - dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); - dml_print("DML::%s: HActive = %u\n", __func__, HActive); - dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); - dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); - dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); - dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); - dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); -#endif - - return DSCDelayRequirement_val; -} - -static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - dml_bool_t NotUrgentLatencyHiding[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[]) -{ - dml_bool_t NotEnoughUrgentLatencyHiding = false; - dml_bool_t CalculateVActiveBandwithSupport_val = false; - dml_float_t VActiveBandwith = 0; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (NotUrgentLatencyHiding[k]) { - NotEnoughUrgentLatencyHiding = true; - } - } - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; - } - - CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding); - dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); - dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); - dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val); -#endif - return CalculateVActiveBandwithSupport_val; -} - -static void CalculatePrefetchBandwithSupport( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - dml_bool_t NotUrgentLatencyHiding[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_float_t cursor_bw_pre[], - dml_float_t prefetch_vmrow_bw[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[], - - // Output - dml_float_t *PrefetchBandwidth, - dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, - dml_float_t *FractionOfUrgentBandwidth, - dml_bool_t *PrefetchBandwidthSupport) -{ - dml_bool_t NotEnoughUrgentLatencyHiding = false; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (NotUrgentLatencyHiding[k]) { - NotEnoughUrgentLatencyHiding = true; - } - } - - *PrefetchBandwidth = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), - NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - } - - *PrefetchBandwidthNotIncludingMALLPrefetch = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) - *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch - + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] - + cursor_bw[k] * UrgentBurstFactorCursor[k] - + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), - NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] - + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) - + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - } - - *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; - *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); - dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth); - dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth); - dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport); -#endif -} - -static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t cursor_bw_pre[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[]) -{ - dml_float_t ret_val = ReturnBW; - - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], - NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + - cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u\n", __func__, k); - dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); - dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); - dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); - dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); - dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); - dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); - dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); - - dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); - dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); - dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); - dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); - dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); - dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); - dml_print("DML::%s: ret_val = %f\n", __func__, ret_val); -#endif - } - - return ret_val; -} - -static void CalculateImmediateFlipBandwithSupport( - dml_uint_t NumberOfActiveSurfaces, - dml_float_t ReturnBW, - enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], - enum dml_immediate_flip_requirement ImmediateFlipRequirement[], - dml_float_t final_flip_bw[], - dml_float_t ReadBandwidthLuma[], - dml_float_t ReadBandwidthChroma[], - dml_float_t PrefetchBandwidthLuma[], - dml_float_t PrefetchBandwidthChroma[], - dml_float_t cursor_bw[], - dml_float_t meta_row_bandwidth[], - dml_float_t dpte_row_bandwidth[], - dml_float_t cursor_bw_pre[], - dml_float_t prefetch_vmrow_bw[], - dml_uint_t NumberOfDPP[], - dml_float_t UrgentBurstFactorLuma[], - dml_float_t UrgentBurstFactorChroma[], - dml_float_t UrgentBurstFactorCursor[], - dml_float_t UrgentBurstFactorLumaPre[], - dml_float_t UrgentBurstFactorChromaPre[], - dml_float_t UrgentBurstFactorCursorPre[], - - // Output - dml_float_t *TotalBandwidth, - dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, - dml_float_t *FractionOfUrgentBandwidth, - dml_bool_t *ImmediateFlipBandwidthSupport) -{ - *TotalBandwidth = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { - - - - *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], - NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - } else { - *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], - NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k = %u\n", __func__, k); - dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]); - dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); - dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); - dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]); - dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]); - dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); - dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); - dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); - dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); - dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); - dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); - dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); - dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); - dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); - dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); - dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); - dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); - dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]); - dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]); -#endif - } - - *TotalBandwidthNotIncludingMALLPrefetch = 0; - for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { - if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { - if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) - *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], - NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) - + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - else - *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], - NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) - + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], - NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) - + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); - } - } - - *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); - *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); - dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); - dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport); -#endif -} - -static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock) -{ - dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0); - - return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0); -} - -/// @brief Calculate the maximum vstartup for mode support and mode programming consideration -/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge -static dml_uint_t CalculateMaxVStartup( - dml_uint_t plane_idx, - dml_bool_t ptoi_supported, - dml_uint_t vblank_nom_default_us, - struct dml_timing_cfg_st *timing, - dml_float_t write_back_delay_us) -{ - dml_uint_t vblank_size = 0; - dml_uint_t max_vstartup_lines = 0; - const dml_uint_t max_allowed_vblank_nom = 1023; - - dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx]; - dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx]; - - dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx], - timing->PixelClock[plane_idx]); - dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line); - - // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2) - // + 2 is because - // 1 -> VStartup_start should be 1 line before VSync - // 1 -> always reserve 1 line between start of VBlank to VStartup signal - dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input, - timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2); - dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom); - dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ? - vblank_nom_default_in_line : vblank_nom_max_allowed_capped; - - vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail); - - if (timing->Interlace[plane_idx] && !ptoi_supported) - max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0)); - else - max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0)); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx); - dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]); - dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); - dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us); - dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); - dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); - dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); -#endif - max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START); - return max_vstartup_lines; -} - -static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, - struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, - dml_uint_t j, - dml_uint_t k) -{ - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; - CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); - CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; - CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; - CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; -} - -static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) -{ - struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; - struct DmlPipe *myPipe; - dml_uint_t j, k; - - for (j = 0; j < 2; ++j) { - mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; - mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; - mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; - mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; - mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; - mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; - mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; - mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j]; - mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j]; - mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; - mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; - mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; - } - - mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.NotUrgentLatencyHiding, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma[j], - mode_lib->ms.UrgentBurstFactorChroma[j], - mode_lib->ms.UrgentBurstFactorCursor[j]); - - s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( - &mode_lib->ms.soc, - mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.DCFCLKState[j], - mode_lib->ms.state.fabricclk_mhz, - mode_lib->ms.state.dram_speed_mts); - - s->HostVMInefficiencyFactor = 1; - if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) - s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState; - - mode_lib->ms.ExtraLatency = CalculateExtraLatency( - mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderingBytes, - mode_lib->ms.DCFCLKState[j], - mode_lib->ms.TotalNumberOfActiveDPP[j], - mode_lib->ms.ip.pixel_chunk_size_kbytes, - mode_lib->ms.TotalNumberOfDCCActiveDPP[j], - mode_lib->ms.ip.meta_chunk_size_kbytes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.num_active_planes, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.dpte_group_bytes, - s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); - - s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; - s->MaxVStartup = 0; - s->AllPrefetchModeTested = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); - s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; - } - - do { - s->MaxVStartup = s->NextMaxVStartup; - s->AllPrefetchModeTested = true; - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k]; - mode_lib->ms.TWait = CalculateTWait( - mode_lib->ms.PrefetchMode[k], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], - mode_lib->ms.state.dram_clock_change_latency_us, - mode_lib->ms.state.fclk_change_latency_us, - mode_lib->ms.UrgLatency, - mode_lib->ms.state.sr_enter_plus_exit_time_us); - - myPipe = &s->myPipe; - myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; - myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j]; - myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; - myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; - myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; - myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; - myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; - myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; - myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; - myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; - myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; - myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; - myPipe->ODMMode = mode_lib->ms.ODMModePerState[k]; - myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; - myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k); - dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]); - dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup); - dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]); - dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); - dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); -#endif - - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; - CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; - CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; - CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; - - set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k); - - mode_lib->ms.support.NoTimeForPrefetch[j][k] = - CalculatePrefetchSchedule(&mode_lib->scratch, - CalculatePrefetchSchedule_params); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateUrgentBurstFactor( - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.swath_width_luma_ub_this_state[k], - mode_lib->ms.swath_width_chroma_ub_this_state[k], - mode_lib->ms.SwathHeightYThisState[k], - mode_lib->ms.SwathHeightCThisState[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.ip.cursor_buffer_size, - mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], - mode_lib->ms.VRatioPreY[j][k], - mode_lib->ms.VRatioPreC[j][k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeYThisState[k], - mode_lib->ms.DETBufferSizeCThisState[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorCursorPre[k], - &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChromaPre[k], - &mode_lib->ms.NotUrgentLatencyHidingPre[k]); - - mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / - mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k]; - } - - { - CalculatePrefetchBandwithSupport( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.NotUrgentLatencyHidingPre, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.prefetch_vmrow_bw, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma[j], - mode_lib->ms.UrgentBurstFactorChroma[j], - mode_lib->ms.UrgentBurstFactorCursor[j], - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre, - - /* output */ - &s->dummy_single[0], // dml_float_t *PrefetchBandwidth - &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch - &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth - &mode_lib->ms.support.PrefetchSupported[j]); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0 - || mode_lib->ms.LinesForMetaPTE[k] >= 32.0 - || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0 - || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) { - mode_lib->ms.support.PrefetchSupported[j] = false; - } - } - - mode_lib->ms.support.DynamicMetadataSupported[j] = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) { - mode_lib->ms.support.DynamicMetadataSupported[j] = false; - } - } - - mode_lib->ms.support.VRatioInPrefetchSupported[j] = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true || - mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && - (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) { - mode_lib->ms.support.VRatioInPrefetchSupported[j] = false; - } - } - - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - - if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma[j], - mode_lib->ms.UrgentBurstFactorChroma[j], - mode_lib->ms.UrgentBurstFactorCursor[j], - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]); - if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); - } else { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k]; - } - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateFlipSchedule( - s->HostVMInefficiencyFactor, - mode_lib->ms.ExtraLatency, - mode_lib->ms.UrgLatency, - mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, - mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], - mode_lib->ms.MetaRowBytes[j][k], - mode_lib->ms.DPTEBytesPerRow[j][k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - mode_lib->ms.Tno_bw[k], - mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.meta_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.meta_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24 - - /* Output */ - &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k], - &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } - - { - CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBWPerState[j], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.policy.ImmediateFlipRequirement, - mode_lib->ms.final_flip_bw, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.RequiredPrefetchPixelDataBWLuma, - mode_lib->ms.RequiredPrefetchPixelDataBWChroma, - mode_lib->ms.cursor_bw, - mode_lib->ms.meta_row_bandwidth_this_state, - mode_lib->ms.dpte_row_bandwidth_this_state, - mode_lib->ms.cursor_bw_pre, - mode_lib->ms.prefetch_vmrow_bw, - mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here - mode_lib->ms.UrgentBurstFactorLuma[j], - mode_lib->ms.UrgentBurstFactorChroma[j], - mode_lib->ms.UrgentBurstFactorCursor[j], - mode_lib->ms.UrgentBurstFactorLumaPre, - mode_lib->ms.UrgentBurstFactorChromaPre, - mode_lib->ms.UrgentBurstFactorCursorPre, - - /* output */ - &s->dummy_single[0], // dml_float_t *TotalBandwidth - &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch - &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth - &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)) - mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; - } - - } else { // if prefetch not support, assume iflip not supported - mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; - } - - if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) { - s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; - - if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) - s->AllPrefetchModeTested = false; - } - } else { - s->NextMaxVStartup = s->NextMaxVStartup - 1; - } - } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true && - mode_lib->ms.support.VRatioInPrefetchSupported[j] == true && - // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok - // If there is hostvm, DCN needs to support iflip for invalidation - ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) || - (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested))); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k]; - } - - s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; - s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; - s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; - s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; - s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; - s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; - s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; - s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; - s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; - s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; - s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; - - CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; - CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j]; - CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j]; - CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; - CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height; - CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; - CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; - CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; - CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; - CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; - CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; - CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; - CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; - CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; - CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; - CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; - CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; - CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; - CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; - CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; - CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; - CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; - CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; - CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; - CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; - CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState; - - // Output - CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; - CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, - CalculateWatermarks_params); - - } // for j -} - -/// @brief The Mode Support function. -dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) -{ - struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params; - struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; - - dml_uint_t j, k, m; - - mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); - dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); - - CalculateMaxDETAndMinCompressedBufferSize( - mode_lib->ms.ip.config_return_buffer_size_in_kbytes, - mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, - mode_lib->ms.ip.rob_buffer_size_kbytes, - mode_lib->ms.ip.max_num_dpp, - mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA - mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA - - /* Output */ - &mode_lib->ms.MaxTotalDETInKByte, - &mode_lib->ms.NomDETInKByte, - &mode_lib->ms.MinCompressedBufferSizeInKByte); - - PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); - - - /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ - - /*Scale Ratio, taps Support Check*/ - mode_lib->ms.support.ScaleRatioAndTapsSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false - && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) - || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0 - || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0 - || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0 - || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) { - mode_lib->ms.support.ScaleRatioAndTapsSupport = false; - } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0 - || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0 - || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1) - || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio - || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio - || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k] - || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k] - || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe - && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 || - (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) || - mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio || - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio || - mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] || - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) { - mode_lib->ms.support.ScaleRatioAndTapsSupport = false; - } - } - - /*Source Format, Pixel Format and Scan Support Check*/ - mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) { - mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateBytePerPixelAndBlockSizes( - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], - - /* Output */ - &mode_lib->ms.BytePerPixelY[k], - &mode_lib->ms.BytePerPixelC[k], - &mode_lib->ms.BytePerPixelInDETY[k], - &mode_lib->ms.BytePerPixelInDETC[k], - &mode_lib->ms.Read256BlockHeightY[k], - &mode_lib->ms.Read256BlockHeightC[k], - &mode_lib->ms.Read256BlockWidthY[k], - &mode_lib->ms.Read256BlockWidthC[k], - &mode_lib->ms.MacroTileHeightY[k], - &mode_lib->ms.MacroTileHeightC[k], - &mode_lib->ms.MacroTileWidthY[k], - &mode_lib->ms.MacroTileWidthC[k]); - } - - /*Bandwidth Support Check*/ - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) { - mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k]; - mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k]; - } else { - mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; - mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; - } - } - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0; - } - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true - && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) { - mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] - * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] - / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] - * mode_lib->ms.cache_display_cfg.timing.HTotal[k] - / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0; - } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] - * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] - / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] - * mode_lib->ms.cache_display_cfg.timing.HTotal[k] - / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0; - } else { - mode_lib->ms.WriteBandwidth[k] = 0.0; - } - } - - /*Writeback Latency support check*/ - mode_lib->ms.support.WritebackLatencySupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) { - mode_lib->ms.support.WritebackLatencySupport = false; - } - } - - /*Writeback Mode Support Check*/ - s->TotalNumberOfActiveWriteback = 0; - for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; - } - } - - mode_lib->ms.support.EnoughWritebackUnits = 1; - if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) { - mode_lib->ms.support.EnoughWritebackUnits = false; - } - - /*Writeback Scale Ratio and Taps Support Check*/ - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio - || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio - || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio - || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio - || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps - || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps - || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] - || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) { - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; - } - if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) { - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; - } - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateSinglePipeDPPCLKAndSCLThroughput( - mode_lib->ms.cache_display_cfg.plane.HRatio[k], - mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, - mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.plane.HTaps[k], - mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], - mode_lib->ms.cache_display_cfg.plane.VTaps[k], - mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], - /* Output */ - &mode_lib->ms.PSCL_FACTOR[k], - &mode_lib->ms.PSCL_FACTOR_CHROMA[k], - &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { - s->MaximumSwathWidthSupportLuma = 8192; - } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { - s->MaximumSwathWidthSupportLuma = 7680; - } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { - s->MaximumSwathWidthSupportLuma = 4320; - } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) { - s->MaximumSwathWidthSupportLuma = 3840; - } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { - s->MaximumSwathWidthSupportLuma = 3072; - } else { - s->MaximumSwathWidthSupportLuma = 6144; - } - - if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) { - s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0); - } else { - s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; - } - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] / - (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0)); - if (mode_lib->ms.BytePerPixelC[k] == 0.0) { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; - } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = - mode_lib->ms.ip.line_buffer_size_bits - * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0) - / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] - / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] - + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0)); - } - mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); - mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); - } - - /*Number Of DSC Slices*/ - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && - mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) { - mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k]; - - if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) { - if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) { - mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4)); - } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 8; - } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 4; - } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->ms.support.NumberOfDSCSlices[k] = 1; - } - } - } else { - mode_lib->ms.support.NumberOfDSCSlices[k] = 1; - } - } - - CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; - CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; - CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; - CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; - CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; - CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; - CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; - CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; - CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; - CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; - CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; - CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; - CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; - CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; - CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; - CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; - CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; - CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; - CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; - CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; - CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; - CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; - CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; - CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0]; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1]; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2]; - CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3]; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4]; - CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5]; - CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6]; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7]; - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; - - CalculateSwathAndDETConfiguration(&mode_lib->scratch, - CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */ - - s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - s->MPCCombineMethodAsPossible = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage) - s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; - if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible) - s->MPCCombineMethodAsPossible = true; - } - mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible; - - for (j = 0; j < 2; j++) { - mode_lib->ms.TotalNumberOfActiveDPP[j] = 0; - mode_lib->ms.support.TotalAvailablePipesSupport[j] = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateODMMode( - mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.policy.ODMUse[k], - mode_lib->ms.state.dispclk_mhz, - mode_lib->ms.max_state.dispclk_mhz, - false, // DSCEnable - mode_lib->ms.TotalNumberOfActiveDPP[j], - mode_lib->ms.ip.max_num_dpp, - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.soc.dcn_downspread_percent, - mode_lib->ms.ip.dispclk_ramp_margin_percent, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, - mode_lib->ms.support.NumberOfDSCSlices[k], - - /* Output */ - &s->TotalAvailablePipesSupportNoDSC, - &s->NumberOfDPPNoDSC, - &s->ODMModeNoDSC, - &s->RequiredDISPCLKPerSurfaceNoDSC); - - CalculateODMMode( - mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.policy.ODMUse[k], - mode_lib->ms.state.dispclk_mhz, - mode_lib->ms.max_state.dispclk_mhz, - true, // DSCEnable - mode_lib->ms.TotalNumberOfActiveDPP[j], - mode_lib->ms.ip.max_num_dpp, - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.soc.dcn_downspread_percent, - mode_lib->ms.ip.dispclk_ramp_margin_percent, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, - mode_lib->ms.support.NumberOfDSCSlices[k], - - /* Output */ - &s->TotalAvailablePipesSupportDSC, - &s->NumberOfDPPDSC, - &s->ODMModeDSC, - &s->RequiredDISPCLKPerSurfaceDSC); - - CalculateOutputLink( - mode_lib->ms.state.phyclk_mhz, - mode_lib->ms.state.phyclk_d18_mhz, - mode_lib->ms.state.phyclk_d32_mhz, - mode_lib->ms.soc.phy_downspread_percent, - (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k), - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], - mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k], - mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], - mode_lib->ms.support.NumberOfDSCSlices[k], - mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], - mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k], - s->ODMModeNoDSC, - s->ODMModeDSC, - mode_lib->ms.cache_display_cfg.output.DSCEnable[k], - mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k], - mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k], - - /* Output */ - &mode_lib->ms.RequiresDSC[k], - &mode_lib->ms.RequiresFEC[k], - &mode_lib->ms.OutputBppPerState[k], - &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng - &mode_lib->ms.OutputRatePerState[k], - &mode_lib->ms.RequiredSlots[k]); - - if (mode_lib->ms.RequiresDSC[k] == false) { - mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC; - mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC; - if (!s->TotalAvailablePipesSupportNoDSC) - mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; - mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC; - } else { - mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC; - mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC; - if (!s->TotalAvailablePipesSupportDSC) - mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; - mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC; - } - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { - mode_lib->ms.MPCCombine[j][k] = false; - mode_lib->ms.NoOfDPP[j][k] = 4; - } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { - mode_lib->ms.MPCCombine[j][k] = false; - mode_lib->ms.NoOfDPP[j][k] = 2; - } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) { - mode_lib->ms.MPCCombine[j][k] = false; - mode_lib->ms.NoOfDPP[j][k] = 1; - } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100), - 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz && - mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) { - mode_lib->ms.MPCCombine[j][k] = false; - mode_lib->ms.NoOfDPP[j][k] = 1; - } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { - mode_lib->ms.MPCCombine[j][k] = true; - mode_lib->ms.NoOfDPP[j][k] = 2; - mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; - } else { - mode_lib->ms.MPCCombine[j][k] = false; - mode_lib->ms.NoOfDPP[j][k] = 1; - mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; - } - } - - mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0; - s->NoChromaOrLinear = true; - for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NoOfDPP[j][k] == 1) - mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1; - if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 - || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 - || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12 - || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha - || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { - s->NoChromaOrLinear = false; - } - } - - if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting, - mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear, - mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) { - while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) { - s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage && - mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth && - (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) && - mode_lib->ms.MPCCombine[j][k] == false) { - s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; - s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; - } - } - mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true; - mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2; - mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; - mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1; - } - } - - //DISPCLK/DPPCLK - mode_lib->ms.WritebackRequiredDISPCLK = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { - mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK, - CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.ip.writeback_line_buffer_buffer_size, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); - } - } - - mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]); - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; - } - - CalculateDPPCLK(mode_lib->ms.num_active_planes, - mode_lib->ms.soc.dcn_downspread_percent, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, - mode_lib->ms.MinDPPCLKUsingSingleDPP, - mode_lib->ms.NoOfDPPThisState, - /* Output */ - &mode_lib->ms.GlobalDPPCLK, - mode_lib->ms.RequiredDPPCLKThisState); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k]; - } - - mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz)); - - if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { - mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; - } - } // j - - /* Total Available OTG, HDMIFRL, DP Support Check */ - s->TotalNumberOfActiveOTG = 0; - s->TotalNumberOfActiveHDMIFRL = 0; - s->TotalNumberOfActiveDP2p0 = 0; - s->TotalNumberOfActiveDP2p0Outputs = 0; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) - s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) { - s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) { - s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; - } - } - } - } - - mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg); - mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs); - mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs); - - /* Display IO and DSC Support Check */ - mode_lib->ms.support.NonsupportedDSCInputBPC = false; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && - !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0 - || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0 - || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0 - || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component - )) { - mode_lib->ms.support.NonsupportedDSCInputBPC = true; - } - } - - mode_lib->ms.support.ExceededMultistreamSlots = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) { - s->TotalSlots = mode_lib->ms.RequiredSlots[k]; - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k) - s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j]; - } - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63) - mode_lib->ms.support.ExceededMultistreamSlots = true; - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64) - mode_lib->ms.support.ExceededMultistreamSlots = true; - } - } - mode_lib->ms.support.LinkCapacitySupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && - mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) { - mode_lib->ms.support.LinkCapacitySupport = false; - } - } - - mode_lib->ms.support.P2IWith420 = false; - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; - mode_lib->ms.support.DSC422NativeNotSupported = false; - mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; - mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; - mode_lib->ms.support.BPPForMultistreamNotIndicated = false; - mode_lib->ms.support.MultistreamWithHDMIOreDP = false; - mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; - mode_lib->ms.support.NotEnoughLanesForMSO = false; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { - if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true) - mode_lib->ms.support.P2IWith420 = true; - - if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; - if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support) - mode_lib->ms.support.DSC422NativeNotSupported = true; - - if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) && - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) || - ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) && - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0)) - mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; - - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na) - mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) - mode_lib->ms.support.BPPForMultistreamNotIndicated = true; - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) - mode_lib->ms.support.BPPForMultistreamNotIndicated = true; - } - } - - if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) - mode_lib->ms.support.MultistreamWithHDMIOreDP = true; - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { - if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j) - mode_lib->ms.support.MultistreamWithHDMIOreDP = true; - } - } - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 || - mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4)) - mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; - - if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) || - (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4)) - mode_lib->ms.support.NotEnoughLanesForMSO = true; - } - } - - mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl && - RequiredDTBCLK( - mode_lib->ms.RequiresDSC[k], - mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.OutputBppPerState[k], - mode_lib->ms.support.NumberOfDSCSlices[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], - mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) { - mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; - } - } - - mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true; - mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) { - mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false; - } - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) { - mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false; - } - } - - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { - if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) { - s->DSCFormatFactor = 2; - } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) { - s->DSCFormatFactor = 1; - } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { - s->DSCFormatFactor = 2; - } else { - s->DSCFormatFactor = 1; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); -#endif - if (mode_lib->ms.RequiresDSC[k] == true) { - if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { - if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); - dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz); - dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); -#endif - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; - } - } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { - if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; - } - } else { - if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; - } - } - } - } - } - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); -#endif - - /* Check DSC Unit and Slices Support */ - mode_lib->ms.support.NotEnoughDSCUnits = false; - mode_lib->ms.support.NotEnoughDSCSlices = false; - s->TotalDSCUnitsRequired = 0; - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.RequiresDSC[k] == true) { - if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { - if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; - s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4; - if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16) - mode_lib->ms.support.NotEnoughDSCSlices = true; - } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { - if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; - s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2; - if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8) - mode_lib->ms.support.NotEnoughDSCSlices = true; - } else { - if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; - s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1; - if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4) - mode_lib->ms.support.NotEnoughDSCSlices = true; - } - } - } - if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) { - mode_lib->ms.support.NotEnoughDSCUnits = true; - } - - /*DSC Delay per state*/ - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], - mode_lib->ms.ODMModePerState[k], - mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], - mode_lib->ms.OutputBppPerState[k], - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.support.NumberOfDSCSlices[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { - for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) { - mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m]; - } - } - } - } - - //Calculate Swath, DET Configuration, DCFCLKDeepSleep - // - for (j = 0; j < 2; ++j) { - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; - mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; - mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k]; - } - - CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; - CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; - CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; - CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; - CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; - CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; - CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; - CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; - CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; - CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; - CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; - CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; - CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; - CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; - CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; - CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; - CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; - CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; - CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; - CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState; - CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; - CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; - CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; - CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state; - CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState; - CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; - CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState; - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j]; - - CalculateSwathAndDETConfiguration(&mode_lib->scratch, - CalculateSwathAndDETConfiguration_params); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k]; - mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k]; - mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k]; - mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k]; - mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k]; - mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k]; - mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState; - mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState; - mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k]; - mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k]; - mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k]; - } - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - } - - CalculateSurfaceSizeInMall( - mode_lib->ms.num_active_planes, - mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, - mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, - mode_lib->ms.cache_display_cfg.surface.DCCEnable, - mode_lib->ms.cache_display_cfg.plane.ViewportStationary, - mode_lib->ms.cache_display_cfg.plane.ViewportXStart, - mode_lib->ms.cache_display_cfg.plane.ViewportYStart, - mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, - mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, - mode_lib->ms.cache_display_cfg.plane.ViewportWidth, - mode_lib->ms.cache_display_cfg.plane.ViewportHeight, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, - mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, - mode_lib->ms.Read256BlockWidthY, - mode_lib->ms.Read256BlockWidthC, - mode_lib->ms.Read256BlockHeightY, - mode_lib->ms.Read256BlockHeightC, - mode_lib->ms.MacroTileWidthY, - mode_lib->ms.MacroTileWidthC, - mode_lib->ms.MacroTileHeightY, - mode_lib->ms.MacroTileHeightC, - - /* Output */ - mode_lib->ms.SurfaceSizeInMALL, - &mode_lib->ms.support.ExceededMALLSize); - - for (j = 0; j < 2; j++) { - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; - mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; - mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; - mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; - mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; - mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; - mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; - mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; - mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; - mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; - mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; - } - - mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { - mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k]; - } - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; - s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; - s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; - s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; - s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; - s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; - s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; - s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; - s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; - s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; - s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; - s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; - s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; - s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; - s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; - s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; - s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; - s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; - s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; - s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; - s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; - s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; - s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; - s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; - s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; - s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; - s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; - s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; - s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; - } - - CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; - CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; - CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; - CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; - CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; - CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; - CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; - CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; - CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; - CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState; - CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0]; - CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1]; - CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; - CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; - CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA - CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA - CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4]; - CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5]; - CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6]; - CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7]; - CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8]; - CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9]; - CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height; - CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; - CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10]; - CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11]; - CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12]; - CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13]; - CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14]; - CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15]; - CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16]; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17]; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18]; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19]; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20]; - CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState; - CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState; - CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; - CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; - CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY; - CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC; - CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state; - CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state; - CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState; - CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState; - CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState; - CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state; - CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state; - CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0]; - CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; - CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21]; - - CalculateVMRowAndSwath(&mode_lib->scratch, - CalculateVMRowAndSwath_params); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k]; - mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k]; - mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k]; - mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k]; - mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k]; - mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k]; - mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k]; - mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k]; - mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k]; - } - - mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false) - mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]); -#endif - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]); -#endif - - mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false) - mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false; - } - - mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, - mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, - mode_lib->ms.state.urgent_latency_vm_data_only_us, - mode_lib->ms.soc.do_urgent_latency_adjustment, - mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, - mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, - mode_lib->ms.state.fabricclk_mhz); - - /* Getter functions work at mp interface so copy the urgent latency to mp*/ - mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateUrgentBurstFactor( - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.swath_width_luma_ub_this_state[k], - mode_lib->ms.swath_width_chroma_ub_this_state[k], - mode_lib->ms.SwathHeightYThisState[k], - mode_lib->ms.SwathHeightCThisState[k], - (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.ip.cursor_buffer_size, - mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeYThisState[k], - mode_lib->ms.DETBufferSizeCThisState[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorCursor[j][k], - &mode_lib->ms.UrgentBurstFactorLuma[j][k], - &mode_lib->ms.UrgentBurstFactorChroma[j][k], - &mode_lib->ms.NotUrgentLatencyHiding[k]); - } - - CalculateDCFCLKDeepSleep( - mode_lib->ms.num_active_planes, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.cache_display_cfg.plane.VRatio, - mode_lib->ms.cache_display_cfg.plane.VRatioChroma, - mode_lib->ms.SwathWidthYThisState, - mode_lib->ms.SwathWidthCThisState, - mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.cache_display_cfg.plane.HRatio, - mode_lib->ms.cache_display_cfg.plane.HRatioChroma, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - mode_lib->ms.PSCL_FACTOR, - mode_lib->ms.PSCL_FACTOR_CHROMA, - mode_lib->ms.RequiredDPPCLKThisState, - mode_lib->ms.ReadBandwidthLuma, - mode_lib->ms.ReadBandwidthChroma, - mode_lib->ms.soc.return_bus_width_bytes, - - /* Output */ - &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]); - } - - //Calculate Return BW - for (j = 0; j < 2; ++j) { - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( - mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j]; - } else { - mode_lib->ms.WritebackDelayTime[k] = 0.0; - } - for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) { - mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k], - mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( - mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m], - mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]); - } - } - } - } - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) { - mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m]; - } - } - } - s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - s->MaximumVStartup[j][k] = CalculateMaxVStartup(k, - mode_lib->ms.ip.ptoi_supported, - mode_lib->ms.ip.vblank_nom_default_us, - &mode_lib->ms.cache_display_cfg.timing, - mode_lib->ms.WritebackDelayTime[k]); - - s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k])); -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]); - dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]); -#endif - } - } - - s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - for (j = 0; j < 2; ++j) { - mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz; - } - - /* Immediate Flip and MALL parameters */ - s->ImmediateFlipRequiredFinal = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); - } - - mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified || - ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) && - (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)); - } - mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal; - - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) && - (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)); - } - - mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || - ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) || - ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)); - } - - s->FullFrameMALLPStateMethod = false; - s->SubViewportMALLPStateMethod = false; - s->PhantomPipeMALLPStateMethod = false; - s->SubViewportMALLRefreshGreaterThan120Hz = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) - s->FullFrameMALLPStateMethod = true; - if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) { - s->SubViewportMALLPStateMethod = true; - if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120) - s->SubViewportMALLRefreshGreaterThan120Hz = true; - } - if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) - s->PhantomPipeMALLPStateMethod = true; - } - mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) - || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; - - if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) { - UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; - UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; - UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; - UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter; - UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us; - UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; - UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; - UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes; - UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles; - UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes; - UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; - UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes; - UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; - UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; - UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; - UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent; - UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent; - UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; - UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; - UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes; - UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired; - UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; - UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface; - UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK; - UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency; - UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP; - UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep; - UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup; - UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP; - UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP; - UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY; - UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC; - UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states; - UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states; - UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY; - UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC; - UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; - UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; - UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame; - UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; - UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes; - UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable; - UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; - UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; - UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz; - UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState; - - UseMinimumDCFCLK(&mode_lib->scratch, - UseMinimumDCFCLK_params); - - } // UseMinimumRequiredDCFCLK == true - - for (j = 0; j < 2; ++j) { - mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz, - mode_lib->ms.state.dram_speed_mts); - mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.state.dram_speed_mts); - } - - //Re-ordering Buffer Support Check - for (j = 0; j < 2; ++j) { - if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] > - (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) { - mode_lib->ms.support.ROBSupport[j] = true; - } else { - mode_lib->ms.support.ROBSupport[j] = false; - } - dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__); - } - - //Vertical Active BW support check - s->MaxTotalVActiveRDBandwidth = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; - } - - for (j = 0; j < 2; ++j) { - mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0, - mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0, - mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes * - ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ? - mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0); - - if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) { - mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true; - } else { - mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false; - } - } - - /* Prefetch Check */ - dml_prefetch_check(mode_lib); - - // End of Prefetch Check - dml_print("DML::%s: Done prefetch calculation\n", __func__); - - /*Cursor Support Check*/ - mode_lib->ms.support.CursorSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) { - if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) { - mode_lib->ms.support.CursorSupport = false; - } - } - } - - /*Valid Pitch Check*/ - mode_lib->ms.support.PitchSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.support.AlignedYPitch[k] = dml_ceil( - dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), - mode_lib->ms.MacroTileWidthY[k]); - if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { - mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); - } else { - mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; - } - if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe - && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) { - mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]); - if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { - mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); - } else { - mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; - } - } else { - mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; - mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; - } - if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] || - mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) { - mode_lib->ms.support.PitchSupport = false; - } - } - - mode_lib->ms.support.ViewportExceedsSurface = false; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) { - mode_lib->ms.support.ViewportExceedsSurface = true; - if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 && - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) { - if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } - } - } - } - - /*Mode Support, Voltage State and SOC Configuration*/ - for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on - dml_print("DML::%s: checking support for j=%u\n", __func__, j); - dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx); - - s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx); - s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts); - - s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal || - (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) || - mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported); - s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal || - (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) || - mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported); - - if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true - && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true - && mode_lib->ms.support.ViewportSizeSupport[j] == true - && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion - && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated - && !mode_lib->ms.support.BPPForMultistreamNotIndicated - && !mode_lib->ms.support.MultistreamWithHDMIOreDP - && !mode_lib->ms.support.ExceededMultistreamSlots - && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink - && !mode_lib->ms.support.NotEnoughLanesForMSO - && mode_lib->ms.support.LinkCapacitySupport == true - && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP - && !mode_lib->ms.support.DSC422NativeNotSupported - && !mode_lib->ms.support.MPCCombineMethodIncompatible - && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true - && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true - && mode_lib->ms.support.NotEnoughDSCUnits == false - && !mode_lib->ms.support.NotEnoughDSCSlices - && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false - && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport - && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false - && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState - && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->ms.support.ROBSupport[j] == true - && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true - && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true - && mode_lib->ms.support.NumberOfOTGSupport == true - && mode_lib->ms.support.NumberOfHDMIFRLSupport == true - && mode_lib->ms.support.NumberOfDP2p0Support == true - && mode_lib->ms.support.EnoughWritebackUnits == true - && mode_lib->ms.support.WritebackLatencySupport == true - && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true - && mode_lib->ms.support.CursorSupport == true - && mode_lib->ms.support.PitchSupport == true - && mode_lib->ms.support.ViewportExceedsSurface == false - && mode_lib->ms.support.PrefetchSupported[j] == true - && mode_lib->ms.support.VActiveBandwithSupport[j] == true - && mode_lib->ms.support.DynamicMetadataSupported[j] == true - && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true - && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true - && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true - && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true - && mode_lib->ms.support.NonsupportedDSCInputBPC == false - && !mode_lib->ms.support.ExceededMALLSize - && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) - && s->dram_clock_change_support == true - && s->f_clock_change_support == true - && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) { - dml_print("DML::%s: mode is supported\n", __func__); - mode_lib->ms.support.ModeSupport[j] = true; - } else { - dml_print("DML::%s: mode is NOT supported\n", __func__); - mode_lib->ms.support.ModeSupport[j] = false; - dml_print_mode_support(mode_lib, j); - } - } - - mode_lib->ms.support.MaximumMPCCombine = 0; - mode_lib->ms.support.ModeIsSupported = 0; - if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine - mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true; - - // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc. - if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible || - (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal && - (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) && - !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) || - ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr - || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame - || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp - ) && - mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported))) - || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal && - ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) || - (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) { - mode_lib->ms.support.MaximumMPCCombine = 1; - } else { - mode_lib->ms.support.MaximumMPCCombine = 0; - } - } - - // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). - mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip - mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational - mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational - - dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported); - dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine); - dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled); - dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; - mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; - } - - mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts; - mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz; - mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz; - mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine]; - mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine]; - mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine]; - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k]; - } else { - mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass; - } - - mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; - mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; - mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k]; - mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k]; - mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k]; - mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k]; - } - - return mode_lib->ms.support.ModeIsSupported; -} // dml_core_mode_support - -/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all -void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib) -{ - CalculateMaxDETAndMinCompressedBufferSize( - mode_lib->ms.ip.config_return_buffer_size_in_kbytes, - mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, - mode_lib->ms.ip.rob_buffer_size_kbytes, - mode_lib->ms.ip.max_num_dpp, - mode_lib->ms.policy.NomDETInKByteOverrideEnable, - mode_lib->ms.policy.NomDETInKByteOverrideValue, - - /* Output */ - &mode_lib->ms.MaxTotalDETInKByte, - &mode_lib->ms.NomDETInKByte, - &mode_lib->ms.MinCompressedBufferSizeInKByte); - - PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); - - mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc, - mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.DCFCLK, - mode_lib->ms.FabricClock, - mode_lib->ms.DRAMSpeed); - dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); - -} // dml_core_mode_support_partial - -/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state -void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg) -{ - struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals; - struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; - struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; - struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; - struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; - - struct mode_program_st *locals = &mode_lib->mp; - struct DmlPipe *myPipe; - dml_uint_t j = 0, k = 0; - dml_float_t TWait; - dml_bool_t isInterlaceTiming; - - mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); - mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); - dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: --- START --- \n", __func__); - dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); - dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); -#endif - - s->DSCFormatFactor = 0; - - // Unlike dppclk and dispclk which can be calculated in mode_programming - // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation) - if (clk_cfg->dcfclk_option != dml_use_override_freq) - locals->Dcfclk = mode_lib->ms.DCFCLK; - else - locals->Dcfclk = clk_cfg->dcfclk_mhz; - -#ifdef __DML_VBA_DEBUG__ - dml_print_dml_policy(&mode_lib->ms.policy); - dml_print_soc_state_bounding_box(&mode_lib->ms.state); - dml_print_soc_bounding_box(&mode_lib->ms.soc); - dml_print_clk_cfg(clk_cfg); - - dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk); - dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); -#endif - - locals->WritebackDISPCLK = 0.0; - locals->GlobalDPPCLK = 0.0; - - // DISPCLK and DPPCLK Calculation - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { - locals->WritebackDISPCLK = - dml_max( - locals->WritebackDISPCLK, - CalculateWriteBackDISPCLK( - mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.ip.writeback_line_buffer_buffer_size, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); - } - } - - locals->Dispclk_calculated = locals->WritebackDISPCLK; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk( - mode_lib->ms.cache_display_cfg.hw.ODMMode[k], - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.soc.dcn_downspread_percent, - mode_lib->ms.ip.dispclk_ramp_margin_percent, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, - mode_lib->ms.max_state.dispclk_mhz)); - } - } - if (clk_cfg->dispclk_option == dml_use_required_freq) - locals->Dispclk = locals->Dispclk_calculated; - else if (clk_cfg->dispclk_option == dml_use_override_freq) - locals->Dispclk = clk_cfg->dispclk_mhz; - else - locals->Dispclk = mode_lib->ms.state.dispclk_mhz; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk); -#endif - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateSinglePipeDPPCLKAndSCLThroughput( - mode_lib->ms.cache_display_cfg.plane.HRatio[k], - mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, - mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.plane.HTaps[k], - mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], - mode_lib->ms.cache_display_cfg.plane.VTaps[k], - mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], - - /* Output */ - &locals->PSCL_THROUGHPUT[k], - &locals->PSCL_THROUGHPUT_CHROMA[k], - &locals->DPPCLKUsingSingleDPP[k]); - } - - CalculateDPPCLK(mode_lib->ms.num_active_planes, - mode_lib->ms.soc.dcn_downspread_percent, - mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, - locals->DPPCLKUsingSingleDPP, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - /* Output */ - &locals->GlobalDPPCLK, - locals->Dppclk_calculated); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (clk_cfg->dppclk_option[k] == dml_use_required_freq) - locals->Dppclk[k] = locals->Dppclk_calculated[k]; - else if (clk_cfg->dppclk_option[k] == dml_use_override_freq) - locals->Dppclk[k] = clk_cfg->dppclk_mhz[k]; - else - locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]); -#endif - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateBytePerPixelAndBlockSizes( - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], - - /* Output */ - &locals->BytePerPixelY[k], - &locals->BytePerPixelC[k], - &locals->BytePerPixelDETY[k], - &locals->BytePerPixelDETC[k], - &locals->BlockHeight256BytesY[k], - &locals->BlockHeight256BytesC[k], - &locals->BlockWidth256BytesY[k], - &locals->BlockWidth256BytesC[k], - &locals->BlockHeightY[k], - &locals->BlockHeightC[k], - &locals->BlockWidthY[k], - &locals->BlockWidthC[k]); - } - - - dml_print("DML::%s: %u\n", __func__, __LINE__); - CalculateSwathWidth( - false, // ForceSingleDPP - mode_lib->ms.num_active_planes, - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat, - mode_lib->ms.cache_display_cfg.plane.SourceScan, - mode_lib->ms.cache_display_cfg.plane.ViewportStationary, - mode_lib->ms.cache_display_cfg.plane.ViewportWidth, - mode_lib->ms.cache_display_cfg.plane.ViewportHeight, - mode_lib->ms.cache_display_cfg.plane.ViewportXStart, - mode_lib->ms.cache_display_cfg.plane.ViewportYStart, - mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, - mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, - mode_lib->ms.cache_display_cfg.hw.ODMMode, - locals->BytePerPixelY, - locals->BytePerPixelC, - locals->BlockHeight256BytesY, - locals->BlockHeight256BytesC, - locals->BlockWidth256BytesY, - locals->BlockWidth256BytesC, - mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, - mode_lib->ms.cache_display_cfg.timing.HActive, - mode_lib->ms.cache_display_cfg.plane.HRatio, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - - /* Output */ - locals->SwathWidthSingleDPPY, - locals->SwathWidthSingleDPPC, - locals->SwathWidthY, - locals->SwathWidthC, - s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[] - s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[] - locals->swath_width_luma_ub, - locals->swath_width_chroma_ub); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; - dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); - dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); - } - - CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; - CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; - CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; - CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; - CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; - CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; - CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; - CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; - CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; - CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; - CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; - CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; - CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; - CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; - CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; - CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; - CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; - CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; - CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC; - CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode; - CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY; - CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC; - CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY; - CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC; - CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; - CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; - CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; - CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY; - CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; - - // VBA_DELTA - // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage - CalculateSwathAndDETConfiguration(&mode_lib->scratch, - CalculateSwathAndDETConfiguration_params); - - // DCFCLK Deep Sleep - CalculateDCFCLKDeepSleep( - mode_lib->ms.num_active_planes, - locals->BytePerPixelY, - locals->BytePerPixelC, - mode_lib->ms.cache_display_cfg.plane.VRatio, - mode_lib->ms.cache_display_cfg.plane.VRatioChroma, - locals->SwathWidthY, - locals->SwathWidthC, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - mode_lib->ms.cache_display_cfg.plane.HRatio, - mode_lib->ms.cache_display_cfg.plane.HRatioChroma, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - locals->PSCL_THROUGHPUT, - locals->PSCL_THROUGHPUT_CHROMA, - locals->Dppclk, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - mode_lib->ms.soc.return_bus_width_bytes, - - /* Output */ - &locals->DCFCLKDeepSleep); - - // DSCCLK - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) { - locals->DSCCLK_calculated[k] = 0.0; - } else { - if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) - s->DSCFormatFactor = 2; - else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) - s->DSCFormatFactor = 1; - else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) - s->DSCFormatFactor = 2; - else - s->DSCFormatFactor = 1; - if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1) - locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); - else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1) - locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); - else - locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); - } - } - - // DSC Delay - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k], - mode_lib->ms.cache_display_cfg.hw.ODMMode[k], - mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], - mode_lib->ms.cache_display_cfg.output.OutputBpp[k], - mode_lib->ms.cache_display_cfg.timing.HActive[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k], - mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k], - mode_lib->ms.cache_display_cfg.output.OutputFormat[k], - mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], - mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces - if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j]) - locals->DSCDelay[k] = locals->DSCDelay[j]; - - // Prefetch - CalculateSurfaceSizeInMall( - mode_lib->ms.num_active_planes, - mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, - mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, - mode_lib->ms.cache_display_cfg.surface.DCCEnable, - mode_lib->ms.cache_display_cfg.plane.ViewportStationary, - mode_lib->ms.cache_display_cfg.plane.ViewportXStart, - mode_lib->ms.cache_display_cfg.plane.ViewportYStart, - mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, - mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, - mode_lib->ms.cache_display_cfg.plane.ViewportWidth, - mode_lib->ms.cache_display_cfg.plane.ViewportHeight, - locals->BytePerPixelY, - mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, - mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, - locals->BytePerPixelC, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, - locals->BlockWidth256BytesY, - locals->BlockWidth256BytesC, - locals->BlockHeight256BytesY, - locals->BlockHeight256BytesC, - locals->BlockWidthY, - locals->BlockWidthC, - locals->BlockHeightY, - locals->BlockHeightC, - - /* Output */ - locals->SurfaceSizeInTheMALL, - &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */ - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; - s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; - s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; - s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; - s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; - s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; - s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; - s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; - s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; - s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k]; - s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k]; - s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k]; - s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k]; - s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; - s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; - s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; - s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; - s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; - s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k]; - s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k]; - s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; - s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; - s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; - s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; - s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; - s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; - s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; - s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; - s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; - s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; - s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; - s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; - s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; - s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k]; - s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k]; - } - - CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; - CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; - CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; - CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; - CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY; - CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC; - CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; - CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; - CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; - CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; - CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub; - CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub; - CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height; - CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma; - CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear; - CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma; - CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width; - CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma; - CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height; - CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma; - CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width; - CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma; - CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height; - CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma; - CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes; - CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes; - CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY; - CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY; - CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY; - CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC; - CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC; - CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c; - CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY; - CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC; - CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY; - CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC; - CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY; - CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC; - CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw; - CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw; - CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow; - CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame; - CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte; - CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame; - CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip; - CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen; - CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE; - CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE; - - CalculateVMRowAndSwath(&mode_lib->scratch, - CalculateVMRowAndSwath_params); - - s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3( - mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc, - mode_lib->ms.state.use_ideal_dram_bw_strobe, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - locals->Dcfclk, - mode_lib->ms.FabricClock, - mode_lib->ms.DRAMSpeed); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk); - dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes); - dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes); - dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent); - dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed); - dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans); - dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes); - dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx); - dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx); - dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe); - dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW); - dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); -#endif - - s->HostVMInefficiencyFactor = 1.0; - if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) - s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW; - - s->TotalDCCActiveDPP = 0; - s->TotalActiveDPP = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; - if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]) - s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; - } - - locals->UrgentExtraLatency = CalculateExtraLatency( - mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, - locals->Dcfclk, - s->TotalActiveDPP, - mode_lib->ms.ip.pixel_chunk_size_kbytes, - s->TotalDCCActiveDPP, - mode_lib->ms.ip.meta_chunk_size_kbytes, - mode_lib->ms.ReturnBW, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.num_active_planes, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - locals->dpte_group_bytes, - s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); - - locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - locals->WritebackDelay[k] = - mode_lib->ms.state.writeback_latency_us - + CalculateWriteBackDelay( - mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk; - } else - locals->WritebackDelay[k] = 0; - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k - && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) { - locals->WritebackDelay[k] = - dml_max( - locals->WritebackDelay[k], - mode_lib->ms.state.writeback_latency_us - + CalculateWriteBackDelay( - mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j], - mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j], - mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk); - } - } - } - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) - for (j = 0; j < mode_lib->ms.num_active_planes; ++j) - if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j) - locals->WritebackDelay[k] = locals->WritebackDelay[j]; - - locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, - mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, - mode_lib->ms.state.urgent_latency_vm_data_only_us, - mode_lib->ms.soc.do_urgent_latency_adjustment, - mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, - mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, - mode_lib->ms.FabricClock); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - locals->swath_width_luma_ub[k], - locals->swath_width_chroma_ub[k], - locals->SwathHeightY[k], - locals->SwathHeightC[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - locals->UrgentLatency, - mode_lib->ms.ip.cursor_buffer_size, - mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - locals->BytePerPixelDETY[k], - locals->BytePerPixelDETC[k], - locals->DETBufferSizeY[k], - locals->DETBufferSizeC[k], - - /* output */ - &locals->UrgBurstFactorCursor[k], - &locals->UrgBurstFactorLuma[k], - &locals->UrgBurstFactorChroma[k], - &locals->NoUrgentLatencyHiding[k]); - - locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / - ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; - } - - s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; - s->MaxVStartupAllPlanes = 0; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->MaxVStartupLines[k] = CalculateMaxVStartup(k, - mode_lib->ms.ip.ptoi_supported, - mode_lib->ms.ip.vblank_nom_default_us, - &mode_lib->ms.cache_display_cfg.timing, - locals->WritebackDelay[k]); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]); -#endif - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) - s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k])); - - s->ImmediateFlipRequirementFinal = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal); -#endif - - // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement - // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature - // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) - s->iteration = 0; - s->MaxTotalRDBandwidth = 0; - s->AllPrefetchModeTested = false; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); - s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; - } - - do { - s->MaxTotalRDBandwidthNoUrgentBurst = 0.0; - s->DestinationLineTimesForPrefetchLessThan2 = false; - s->VRatioPrefetchMoreThanMax = false; - - dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines); - - s->AllPrefetchModeTested = true; - s->MaxTotalRDBandwidth = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - locals->PrefetchMode[k] = s->NextPrefetchMode[k]; - TWait = CalculateTWait( - locals->PrefetchMode[k], - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], - mode_lib->ms.state.dram_clock_change_latency_us, - mode_lib->ms.state.fclk_change_latency_us, - locals->UrgentLatency, - mode_lib->ms.state.sr_enter_plus_exit_time_us); - - myPipe = &s->myPipe; - myPipe->Dppclk = locals->Dppclk[k]; - myPipe->Dispclk = locals->Dispclk; - myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; - myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep; - myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; - myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; - myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; - myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; - myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; - myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; - myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; - myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; - myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; - myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; - myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; - myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; - myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; - myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k]; - myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; - myPipe->BytePerPixelY = locals->BytePerPixelY[k]; - myPipe->BytePerPixelC = locals->BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); - dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); - dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]); -#endif - - CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); - CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; - CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; - CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; - CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency; - CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency; - CalculatePrefetchSchedule_params->TCalc = locals->TCalc; - CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k]; - CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k]; - CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k]; - CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k]; - CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k]; - CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k]; - CalculatePrefetchSchedule_params->TWait = TWait; - CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k]; - CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k]; - CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k]; - CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k]; - CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k]; - - locals->NoTimeToPrefetch[k] = - CalculatePrefetchSchedule(&mode_lib->scratch, - CalculatePrefetchSchedule_params); - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]); -#endif - locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); - locals->VStartupMin[k] = locals->VStartup[k]; - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateUrgentBurstFactor( - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], - locals->swath_width_luma_ub[k], - locals->swath_width_chroma_ub[k], - locals->SwathHeightY[k], - locals->SwathHeightC[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - locals->UrgentLatency, - mode_lib->ms.ip.cursor_buffer_size, - mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], - mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], - locals->VRatioPrefetchY[k], - locals->VRatioPrefetchC[k], - locals->BytePerPixelDETY[k], - locals->BytePerPixelDETC[k], - locals->DETBufferSizeY[k], - locals->DETBufferSizeC[k], - /* Output */ - &locals->UrgBurstFactorCursorPre[k], - &locals->UrgBurstFactorLumaPre[k], - &locals->UrgBurstFactorChromaPre[k], - &locals->NoUrgentLatencyHidingPre[k]); - - locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k]; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); - dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]); - dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]); - dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]); - dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]); - - dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]); - dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]); - - dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]); - dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); - dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); - dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]); - dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]); - dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]); - dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]); - dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]); - dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]); - dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst); -#endif - if (locals->DestinationLinesForPrefetch[k] < 2) - s->DestinationLineTimesForPrefetchLessThan2 = true; - - if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && - (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__))) - s->VRatioPrefetchMoreThanMax = true; - - //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false; - //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false; - //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) { - // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true; - //} - - //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) { - // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true; - //} - } - - locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst); - dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW); - dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth); -#endif - - CalculatePrefetchBandwithSupport( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBW, - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - locals->NoUrgentLatencyHidingPre, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->RequiredPrefetchPixDataBWLuma, - locals->RequiredPrefetchPixDataBWChroma, - locals->cursor_bw, - locals->meta_row_bw, - locals->dpte_row_bw, - locals->cursor_bw_pre, - locals->prefetch_vmrow_bw, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - locals->UrgBurstFactorLuma, - locals->UrgBurstFactorChroma, - locals->UrgBurstFactorCursor, - locals->UrgBurstFactorLumaPre, - locals->UrgBurstFactorChromaPre, - locals->UrgBurstFactorCursorPre, - - /* output */ - &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth - &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch - &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth - &locals->PrefetchModeSupported); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) - s->dummy_unit_vector[k] = 1.0; - - CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBW, - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - locals->NoUrgentLatencyHidingPre, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->RequiredPrefetchPixDataBWLuma, - locals->RequiredPrefetchPixDataBWChroma, - locals->cursor_bw, - locals->meta_row_bw, - locals->dpte_row_bw, - locals->cursor_bw_pre, - locals->prefetch_vmrow_bw, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - - /* output */ - &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth - &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch - &locals->FractionOfUrgentBandwidth, - &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport - - - - if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { - dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); - dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); - locals->PrefetchModeSupported = false; - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) { - dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]); - dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]); - locals->PrefetchModeSupported = false; - } - } - - - if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { - locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( - mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBW, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->RequiredPrefetchPixDataBWLuma, - locals->RequiredPrefetchPixDataBWChroma, - locals->cursor_bw, - locals->cursor_bw_pre, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - locals->UrgBurstFactorLuma, - locals->UrgBurstFactorChroma, - locals->UrgBurstFactorCursor, - locals->UrgBurstFactorLumaPre, - locals->UrgBurstFactorChromaPre, - locals->UrgBurstFactorCursorPre); - - locals->TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { - locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]); - if (locals->use_one_row_for_frame_flip[k]) { - locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]); - } else { - locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k]; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k = %u\n", __func__, k); - dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); - dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]); - dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]); - dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]); - dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes); -#endif - } - } - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - CalculateFlipSchedule( - s->HostVMInefficiencyFactor, - locals->UrgentExtraLatency, - locals->UrgentLatency, - mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.HostVMEnable, - mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, - locals->PDEAndMetaPTEBytesFrame[k], - locals->MetaRowByte[k], - locals->PixelPTEBytesPerRow[k], - locals->BandwidthAvailableForImmediateFlip, - locals->TotImmediateFlipBytes, - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], - mode_lib->ms.cache_display_cfg.plane.VRatio[k], - mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], - locals->Tno_bw[k], - mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], - locals->dpte_row_height[k], - locals->meta_row_height[k], - locals->dpte_row_height_chroma[k], - locals->meta_row_height_chroma[k], - locals->use_one_row_for_frame_flip[k], - - /* Output */ - &locals->DestinationLinesToRequestVMInImmediateFlip[k], - &locals->DestinationLinesToRequestRowInImmediateFlip[k], - &locals->final_flip_bw[k], - &locals->ImmediateFlipSupportedForPipe[k]); - } - - CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBW, - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.policy.ImmediateFlipRequirement, - locals->final_flip_bw, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->RequiredPrefetchPixDataBWLuma, - locals->RequiredPrefetchPixDataBWChroma, - locals->cursor_bw, - locals->meta_row_bw, - locals->dpte_row_bw, - locals->cursor_bw_pre, - locals->prefetch_vmrow_bw, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - locals->UrgBurstFactorLuma, - locals->UrgBurstFactorChroma, - locals->UrgBurstFactorCursor, - locals->UrgBurstFactorLumaPre, - locals->UrgBurstFactorChromaPre, - locals->UrgBurstFactorCursorPre, - - /* output */ - &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth - &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch - &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth - &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport - - CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, - mode_lib->ms.ReturnBW, - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - mode_lib->ms.policy.ImmediateFlipRequirement, - locals->final_flip_bw, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->RequiredPrefetchPixDataBWLuma, - locals->RequiredPrefetchPixDataBWChroma, - locals->cursor_bw, - locals->meta_row_bw, - locals->dpte_row_bw, - locals->cursor_bw_pre, - locals->prefetch_vmrow_bw, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - s->dummy_unit_vector, - - /* output */ - &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth - &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch - &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth - &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) { - locals->ImmediateFlipSupported = false; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); -#endif - } - } - } else { - locals->ImmediateFlipSupported = false; - locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth; - locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch; - locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth; - locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch; - } - - /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */ - locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true && - ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) || - locals->ImmediateFlipSupported)) ? true : false; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported); - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) - dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); - dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable); - dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported); - dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported); -#endif - dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes); - - s->VStartupLines = s->VStartupLines + 1; - - if (s->VStartupLines > s->MaxVStartupAllPlanes) { - s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; - - if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) - s->AllPrefetchModeTested = false; - dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested); - } - } else { - s->AllPrefetchModeTested = false; - } - s->iteration++; - if (s->iteration > 2500) { - dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__); - ASSERT(0); - } - } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested)); - - if (locals->PrefetchAndImmediateFlipSupported) { - dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes); - } else { - dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes); - } - - //Watermarks and NB P-State/DRAM Clock Change Support - { - s->mmSOCParameters.UrgentLatency = locals->UrgentLatency; - s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency; - s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; - s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; - s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; - s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; - s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; - s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; - s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; - s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; - s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; - - CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; - CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = locals->Dcfclk; - CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW; - CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; - CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; - CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes; - CalculateWatermarks_params->meta_row_height = locals->meta_row_height; - CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma; - CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; - CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep; - CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY; - CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC; - CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY; - CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC; - CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; - CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY; - CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC; - CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; - CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; - CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; - CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; - CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; - CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; - CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; - CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; - CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; - CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; - CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; - CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY; - CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC; - CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler; - CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; - CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; - CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; - CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; - CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; - CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; - CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; - - // Output - CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - &mode_lib->scratch, - CalculateWatermarks_params); - - /* Copy the calculated watermarks to mp.Watermark as the getter functions are - * implemented by the DML team to copy the calculated values from the mp.Watermark interface. - * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to - * unexpected behavior. memmove should be used. - */ - memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks)); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / - mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark); - locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / - mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark); - } else { - locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0; - locals->WritebackAllowFCLKChangeEndPosition[k] = 0; - } - } - } - - //Display Pipeline Delivery Time in Prefetch, Groups - CalculatePixelDeliveryTimes( - mode_lib->ms.num_active_planes, - mode_lib->ms.cache_display_cfg.plane.VRatio, - mode_lib->ms.cache_display_cfg.plane.VRatioChroma, - locals->VRatioPrefetchY, - locals->VRatioPrefetchC, - locals->swath_width_luma_ub, - locals->swath_width_chroma_ub, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - mode_lib->ms.cache_display_cfg.plane.HRatio, - mode_lib->ms.cache_display_cfg.plane.HRatioChroma, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - locals->PSCL_THROUGHPUT, - locals->PSCL_THROUGHPUT_CHROMA, - locals->Dppclk, - locals->BytePerPixelC, - mode_lib->ms.cache_display_cfg.plane.SourceScan, - mode_lib->ms.cache_display_cfg.plane.NumberOfCursors, - mode_lib->ms.cache_display_cfg.plane.CursorWidth, - mode_lib->ms.cache_display_cfg.plane.CursorBPP, - locals->BlockWidth256BytesY, - locals->BlockHeight256BytesY, - locals->BlockWidth256BytesC, - locals->BlockHeight256BytesC, - - /* Output */ - locals->DisplayPipeLineDeliveryTimeLuma, - locals->DisplayPipeLineDeliveryTimeChroma, - locals->DisplayPipeLineDeliveryTimeLumaPrefetch, - locals->DisplayPipeLineDeliveryTimeChromaPrefetch, - locals->DisplayPipeRequestDeliveryTimeLuma, - locals->DisplayPipeRequestDeliveryTimeChroma, - locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, - locals->DisplayPipeRequestDeliveryTimeChromaPrefetch, - locals->CursorRequestDeliveryTime, - locals->CursorRequestDeliveryTimePrefetch); - - CalculateMetaAndPTETimes( - locals->use_one_row_for_frame, - mode_lib->ms.num_active_planes, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.ip.meta_chunk_size_kbytes, - mode_lib->ms.ip.min_meta_chunk_size_bytes, - mode_lib->ms.cache_display_cfg.timing.HTotal, - mode_lib->ms.cache_display_cfg.plane.VRatio, - mode_lib->ms.cache_display_cfg.plane.VRatioChroma, - locals->DestinationLinesToRequestRowInVBlank, - locals->DestinationLinesToRequestRowInImmediateFlip, - mode_lib->ms.cache_display_cfg.surface.DCCEnable, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - locals->BytePerPixelY, - locals->BytePerPixelC, - mode_lib->ms.cache_display_cfg.plane.SourceScan, - locals->dpte_row_height, - locals->dpte_row_height_chroma, - locals->meta_row_width, - locals->meta_row_width_chroma, - locals->meta_row_height, - locals->meta_row_height_chroma, - locals->meta_req_width, - locals->meta_req_width_chroma, - locals->meta_req_height, - locals->meta_req_height_chroma, - locals->dpte_group_bytes, - locals->PTERequestSizeY, - locals->PTERequestSizeC, - locals->PixelPTEReqWidthY, - locals->PixelPTEReqHeightY, - locals->PixelPTEReqWidthC, - locals->PixelPTEReqHeightC, - locals->dpte_row_width_luma_ub, - locals->dpte_row_width_chroma_ub, - - /* Output */ - locals->DST_Y_PER_PTE_ROW_NOM_L, - locals->DST_Y_PER_PTE_ROW_NOM_C, - locals->DST_Y_PER_META_ROW_NOM_L, - locals->DST_Y_PER_META_ROW_NOM_C, - locals->TimePerMetaChunkNominal, - locals->TimePerChromaMetaChunkNominal, - locals->TimePerMetaChunkVBlank, - locals->TimePerChromaMetaChunkVBlank, - locals->TimePerMetaChunkFlip, - locals->TimePerChromaMetaChunkFlip, - locals->time_per_pte_group_nom_luma, - locals->time_per_pte_group_vblank_luma, - locals->time_per_pte_group_flip_luma, - locals->time_per_pte_group_nom_chroma, - locals->time_per_pte_group_vblank_chroma, - locals->time_per_pte_group_flip_chroma); - - CalculateVMGroupAndRequestTimes( - mode_lib->ms.num_active_planes, - mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, - mode_lib->ms.cache_display_cfg.timing.HTotal, - locals->BytePerPixelC, - locals->DestinationLinesToRequestVMInVBlank, - locals->DestinationLinesToRequestVMInImmediateFlip, - mode_lib->ms.cache_display_cfg.surface.DCCEnable, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - locals->dpte_row_width_luma_ub, - locals->dpte_row_width_chroma_ub, - locals->vm_group_bytes, - locals->dpde0_bytes_per_frame_ub_l, - locals->dpde0_bytes_per_frame_ub_c, - locals->meta_pte_bytes_per_frame_ub_l, - locals->meta_pte_bytes_per_frame_ub_c, - - /* Output */ - locals->TimePerVMGroupVBlank, - locals->TimePerVMGroupFlip, - locals->TimePerVMRequestVBlank, - locals->TimePerVMRequestFlip); - - // Min TTUVBlank - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (locals->PrefetchMode[k] == 0) { - locals->MinTTUVBlank[k] = dml_max4( - locals->Watermark.DRAMClockChangeWatermark, - locals->Watermark.FCLKChangeWatermark, - locals->Watermark.StutterEnterPlusExitWatermark, - locals->Watermark.UrgentWatermark); - } else if (locals->PrefetchMode[k] == 1) { - locals->MinTTUVBlank[k] = dml_max3( - locals->Watermark.FCLKChangeWatermark, - locals->Watermark.StutterEnterPlusExitWatermark, - locals->Watermark.UrgentWatermark); - } else if (locals->PrefetchMode[k] == 2) { - locals->MinTTUVBlank[k] = dml_max( - locals->Watermark.StutterEnterPlusExitWatermark, - locals->Watermark.UrgentWatermark); - } else { - locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark; - } - if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]) - locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k]; - } - - // DCC Configuration - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); -#endif - CalculateDCCConfiguration( - mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], - mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal, - mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k], - mode_lib->ms.NomDETInKByte, - locals->BlockHeight256BytesY[k], - locals->BlockHeight256BytesC[k], - mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], - locals->BytePerPixelY[k], - locals->BytePerPixelC[k], - locals->BytePerPixelDETY[k], - locals->BytePerPixelDETC[k], - mode_lib->ms.cache_display_cfg.plane.SourceScan[k], - /* Output */ - &locals->DCCYMaxUncompressedBlock[k], - &locals->DCCCMaxUncompressedBlock[k], - &locals->DCCYMaxCompressedBlock[k], - &locals->DCCCMaxCompressedBlock[k], - &locals->DCCYIndependentBlock[k], - &locals->DCCCIndependentBlock[k]); - } - - // VStartup Adjustment - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]); -#endif - - locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); - dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]); -#endif - - locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin; - if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) { - locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin; - } - - isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported); - - // The actual positioning of the vstartup - locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); - - s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : - mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); - s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; - s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k]; - - if (s->blank_lines_remaining < 0) { - dml_print("ERROR: Vstartup is larger than vblank!?\n"); - s->blank_lines_remaining = 0; - ASSERT(0); - } - locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; - - // debug only - s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : - mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) - + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0)) - + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; - - if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <= - (isInterlaceTiming ? - dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) : - (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) { - locals->VREADY_AT_OR_AFTER_VSYNC[k] = true; - } else { - locals->VREADY_AT_OR_AFTER_VSYNC[k] = false; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]); - dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]); - dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]); - dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]); - dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]); - dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]); - dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]); - dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]); - dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); - dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]); - dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]); - dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START); - dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]); -#endif - } - - //Maximum Bandwidth Used - s->TotalWRBandwidth = 0; - s->WRBandwidth = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) { - s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / - (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4; - } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { - s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / - (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8; - } - s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; - } - - locals->TotalDataReadBandwidth = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth); - dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); - dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); -#endif - } - - locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { - locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch - + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; - } - } - - CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; - CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; - CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; - CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries; - CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries; - CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; - CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; - CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth; - CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk; - CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW; - CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b; - CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs; - CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us; - CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; - CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; - CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; - CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark; - CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark; - CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; - CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; - CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank; - CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; - CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY; - CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY; - CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY; - CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY; - CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY; - CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC; - CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma; - CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma; - CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma; - CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma; - CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; - CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; - CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; - CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; - CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; - CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY; - CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY; - CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC; - CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC; - CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock; - CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock; - CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; - CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable; - CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma; - CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw; - CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw; - CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank; - CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency; - CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame; - CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank; - CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency; - CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame; - CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod; - CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - - // Stutter Efficiency - CalculateStutterEfficiency(&mode_lib->scratch, - CalculateStutterEfficiency_params); - -#ifdef __DML_VBA_ALLOW_DELTA__ - { - dml_float_t dummy_single[2]; - dml_uint_t dummy_integer[1]; - dml_bool_t dummy_boolean[1]; - - // Calculate z8 stutter eff assuming 0 reserved space - CalculateStutterEfficiency( - locals->CompressedBufferSizeInkByte, - mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, - locals->UnboundedRequestEnabled, - mode_lib->ms.ip.meta_fifo_size_in_kentries, - mode_lib->ms.ip.zero_size_buffer_entries, - mode_lib->ms.ip.pixel_chunk_size_kbytes, - mode_lib->ms.num_active_planes, - mode_lib->ms.ip.rob_buffer_size_kbytes, - locals->TotalDataReadBandwidth, - locals->Dcfclk, - mode_lib->ms.ReturnBW, - 0, //mode_lib->ms.ip.compbuf_reserved_space_64b, - 0, //mode_lib->ms.ip.compbuf_reserved_space_zs, - mode_lib->ms.state.sr_exit_time_us, - mode_lib->ms.state.sr_exit_z8_time_us, - mode_lib->ms.policy.SynchronizeTimingsFinal, - mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, - locals->Watermark.StutterEnterPlusExitWatermark, - locals->Watermark.Z8StutterEnterPlusExitWatermark, - mode_lib->ms.ip.ptoi_supported, - mode_lib->ms.cache_display_cfg.timing.Interlace, - locals->MinTTUVBlank, - mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, - mode_lib->ms.DETBufferSizeY, - locals->BytePerPixelY, - locals->BytePerPixelDETY, - locals->SwathWidthY, - mode_lib->ms.SwathHeightY, - mode_lib->ms.SwathHeightC, - mode_lib->ms.cache_display_cfg.surface.DCCRateLuma, - mode_lib->ms.cache_display_cfg.surface.DCCRateChroma, - mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma, - mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma, - mode_lib->ms.cache_display_cfg.timing.HTotal, - mode_lib->ms.cache_display_cfg.timing.VTotal, - mode_lib->ms.cache_display_cfg.timing.PixelClock, - mode_lib->ms.cache_display_cfg.plane.VRatio, - mode_lib->ms.cache_display_cfg.plane.SourceScan, - locals->BlockHeight256BytesY, - locals->BlockWidth256BytesY, - locals->BlockHeight256BytesC, - locals->BlockWidth256BytesC, - locals->DCCYMaxUncompressedBlock, - locals->DCCCMaxUncompressedBlock, - mode_lib->ms.cache_display_cfg.timing.VActive, - mode_lib->ms.cache_display_cfg.surface.DCCEnable, - mode_lib->ms.cache_display_cfg.writeback.WritebackEnable, - locals->ReadBandwidthSurfaceLuma, - locals->ReadBandwidthSurfaceChroma, - locals->meta_row_bw, - locals->dpte_row_bw, - - /* Output */ - &dummy_single[0], - &dummy_single[1], - &dummy_integer[0], - &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase, - &locals->Z8StutterEfficiencyBestCase, - &locals->Z8NumberOfStutterBurstsPerFrameBestCase, - &locals->StutterPeriodBestCase, - &dummy_boolean[0]); - } -#else - locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank; - locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency; - locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame; - locals->StutterPeriodBestCase = locals->StutterPeriod; -#endif - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: --- END --- \n", __func__); -#endif -} // dml_core_mode_programming - -/// Function: dml_core_get_row_heights -/// @brief Get row height for DPTE and META with minimal input. -void dml_core_get_row_heights( - dml_uint_t *dpte_row_height, - dml_uint_t *meta_row_height, - const struct display_mode_lib_st *mode_lib, - dml_bool_t is_plane1, - enum dml_source_format_class SourcePixelFormat, - enum dml_swizzle_mode SurfaceTiling, - enum dml_rotation_angle ScanDirection, - dml_uint_t pitch, - dml_uint_t GPUVMMinPageSizeKBytes) -{ - dml_uint_t BytePerPixelY; - dml_uint_t BytePerPixelC; - dml_float_t BytePerPixelInDETY; - dml_float_t BytePerPixelInDETC; - dml_uint_t BlockHeight256BytesY; - dml_uint_t BlockHeight256BytesC; - dml_uint_t BlockWidth256BytesY; - dml_uint_t BlockWidth256BytesC; - dml_uint_t MacroTileWidthY; - dml_uint_t MacroTileWidthC; - dml_uint_t MacroTileHeightY; - dml_uint_t MacroTileHeightC; - - dml_uint_t BytePerPixel; - dml_uint_t BlockHeight256Bytes; - dml_uint_t BlockWidth256Bytes; - dml_uint_t MacroTileWidth; - dml_uint_t MacroTileHeight; - dml_uint_t PTEBufferSizeInRequests; - - dml_uint_t dummy_integer[16]; - - CalculateBytePerPixelAndBlockSizes( - SourcePixelFormat, - SurfaceTiling, - - /* Output */ - &BytePerPixelY, - &BytePerPixelC, - &BytePerPixelInDETY, - &BytePerPixelInDETC, - &BlockHeight256BytesY, - &BlockHeight256BytesC, - &BlockWidth256BytesY, - &BlockWidth256BytesC, - &MacroTileHeightY, - &MacroTileHeightC, - &MacroTileWidthY, - &MacroTileWidthC); - - BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; - BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; - BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; - MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; - MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; - PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; -#ifdef __DML_RQ_DLG_CALC_DEBUG__ - dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1); - dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel); - dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); - dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); - dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); - dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); - dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); - dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); - dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); - dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); -#endif - - // just supply with enough parameters to calculate meta and dte - CalculateVMAndRowBytes( - 0, // dml_bool_t ViewportStationary, - 1, // dml_bool_t DCCEnable, - 1, // dml_uint_t NumberOfDPPs, - BlockHeight256Bytes, - BlockWidth256Bytes, - SourcePixelFormat, - SurfaceTiling, - BytePerPixel, - ScanDirection, - 0, // dml_uint_t SwathWidth, - 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful) - 0, // dml_uint_t ViewportXStart, - 0, // dml_uint_t ViewportYStart, - 1, // dml_bool_t GPUVMEnable, - 4, // dml_uint_t GPUVMMaxPageTableLevels, - GPUVMMinPageSizeKBytes, - PTEBufferSizeInRequests, - pitch, - 0, // dml_uint_t DCCMetaPitch, - MacroTileWidth, - MacroTileHeight, - - // /* Output */ - &dummy_integer[0], // dml_uint_t *MetaRowByte, - &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow, - &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage, - &dummy_integer[3], // dml_uint_t *dpte_row_width_ub, - dpte_row_height, - &dummy_integer[4], // dml_uint_t *dpte_row_height_linear - &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, - &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame, - &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame, - &dummy_integer[8], // dml_uint_t *MetaRequestWidth, - &dummy_integer[9], // dml_uint_t *MetaRequestHeight, - &dummy_integer[10], // dml_uint_t *meta_row_width, - meta_row_height, - &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth, - &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight, - &dummy_integer[13], // dml_uint_t *PTERequestSize, - &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame, - &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame) - -#ifdef __DML_RQ_DLG_CALC_DEBUG__ - dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); - dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height); -#endif -} - -static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box( - const struct soc_states_st *states, - dml_uint_t state_idx) -{ - dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states); - - if (state_idx >= (dml_uint_t)states->num_states) { - dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states); - ASSERT(0); - } - return (states->state_array[state_idx]); -} - -/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have -/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated. -/// -static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib, - dml_uint_t state_idx) -{ - mode_lib->ms.state_idx = state_idx; - mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1; - mode_lib->ms.soc = mode_lib->soc; - mode_lib->ms.ip = mode_lib->ip; - mode_lib->ms.policy = mode_lib->policy; - mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx); - mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1); -} - -static void cache_display_cfg(struct display_mode_lib_st *mode_lib, - const struct dml_display_cfg_st *display_cfg) -{ - mode_lib->ms.cache_display_cfg = *display_cfg; -} - -static void fetch_socbb_params(struct display_mode_lib_st *mode_lib) -{ - struct soc_state_bounding_box_st *state = &mode_lib->ms.state; - - // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step - // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support - mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz; - mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts; - mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz; - mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz; -} - -/// @brief Use display_cfg directly for mode_support calculation -/// Calculated values and informational output are stored in mode_lib.vba data struct -/// The display configuration is described with pipes struct and num_pipes -/// This function is used when physical resource mapping is not finalized (for example, -/// don't know how many pipes to represent a surface) -/// @param mode_lib Contains the bounding box and policy setting. -/// @param state_idx Power state index -/// @param display_cfg Display configurations. A display -dml_bool_t dml_mode_support( - struct display_mode_lib_st *mode_lib, - dml_uint_t state_idx, - const struct dml_display_cfg_st *display_cfg) -{ - dml_bool_t is_mode_support; - - dml_print("DML::%s: ------------- START ----------\n", __func__); - cache_ip_soc_cfg(mode_lib, state_idx); - cache_display_cfg(mode_lib, display_cfg); - - fetch_socbb_params(mode_lib); - - dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); - - is_mode_support = dml_core_mode_support(mode_lib); - - dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support); - dml_print("DML::%s: ------------- DONE ----------\n", __func__); - return is_mode_support; -} - -/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported) -/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions -/// Calculated values include: watermarks, dlg, rq reg, different clock frequency -/// This function returns 1 when there is no error. -/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK -/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. -/// @param state_idx Power state idx chosen -/// @param display_cfg Display Configuration -/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming -/// TODO: Add clk_cfg input, could be useful for standalone mode -dml_bool_t dml_mode_programming( - struct display_mode_lib_st *mode_lib, - dml_uint_t state_idx, - const struct dml_display_cfg_st *display_cfg, - bool call_standalone) -{ - struct dml_clk_cfg_st clk_cfg; - memset(&clk_cfg, 0, sizeof(clk_cfg)); - - clk_cfg.dcfclk_option = dml_use_required_freq; - clk_cfg.dispclk_option = dml_use_required_freq; - for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) - clk_cfg.dppclk_option[k] = dml_use_required_freq; - - dml_print("DML::%s: ------------- START ----------\n", __func__); - dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); - dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone); - - cache_ip_soc_cfg(mode_lib, state_idx); - cache_display_cfg(mode_lib, display_cfg); - - fetch_socbb_params(mode_lib); - if (call_standalone) { - mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine - dml_core_mode_support_partial(mode_lib); - } - - dml_core_mode_programming(mode_lib, &clk_cfg); - - dml_print("DML::%s: ------------- DONE ----------\n", __func__); - dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); - return mode_lib->mp.PrefetchAndImmediateFlipSupported; -} - -static dml_uint_t mode_support_pwr_states( - dml_uint_t *lowest_state_idx, - struct display_mode_lib_st *mode_lib, - const struct dml_display_cfg_st *display_cfg, - dml_uint_t start_state_idx, - dml_uint_t end_state_idx) -{ - dml_uint_t state_idx = 0; - dml_bool_t mode_is_supported = 0; - *lowest_state_idx = end_state_idx; - - if (end_state_idx < start_state_idx) - ASSERT(0); - - if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based - ASSERT(0); - - for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) { - if (dml_mode_support(mode_lib, state_idx, display_cfg)) { - dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx); - mode_is_supported = 1; - *lowest_state_idx = state_idx; - break; - } - } - - return mode_is_supported; -} - -dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params) -{ - dml_uint_t result; - - result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx, - in_out_params->mode_lib, - in_out_params->in_display_cfg, - in_out_params->in_start_state_idx, - in_out_params->mode_lib->states.num_states - 1); - - if (result) - *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; - - return result; -} - -dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) -{ - dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; - dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]); - return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe); -} - -#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \ -{ \ - dml_uint_t plane_idx; \ - plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \ - return (type) interval_var[plane_idx]; \ -} - -#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \ -{ \ - return (type) internal_var; \ -} - -dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark); -dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark); -dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); -dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency); -dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark); -dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark); -dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark); -dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); -dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark); -dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth); -dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); -dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency); -dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep); -dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); -dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark); -dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency); -dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); -dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency); -dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); -dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod); -dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase); -dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); -dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase); -dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency); -dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); -dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated); -dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth); -dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW); -dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW); -dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc); -dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte); -dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes); -dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes); -dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes); -dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes); -dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes); -dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes); - -dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency -dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated); -dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated); -dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank); -dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY); -dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC); -dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler); -dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler); -dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank); -dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank); -dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch); -dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip); -dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip); -dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L); -dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C); -dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L); -dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C); -dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank); -dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip); -dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank); -dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip); -dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm); -dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl); -dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma); -dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma); -dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch); -dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch); -dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma); -dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma); -dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch); -dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); -dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime); -dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip); -dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip); -dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma); -dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma); -dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma); -dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma); -dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma); -dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma); -dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes); -dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes); -dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY); -dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC); -dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height); -dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma); -dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear); -dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma); -dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height); -dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma); - -dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup); -dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix); -dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix); -dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix); -dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC); -dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START); -dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY); -dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC); -dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen); -dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL); -dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock); -dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock); -dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock); -dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock); -dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock); -dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock); -dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported); -dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE); -dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE); -dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow); -dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte); -dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h deleted file mode 100644 index a38ed89c47a9..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DISPLAY_MODE_CORE_H__ -#define __DISPLAY_MODE_CORE_H__ - -#include "display_mode_core_structs.h" - -struct display_mode_lib_st; - -dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib); -void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib); -void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg); - -void dml_core_get_row_heights( - dml_uint_t *dpte_row_height, - dml_uint_t *meta_row_height, - const struct display_mode_lib_st *mode_lib, - dml_bool_t is_plane1, - enum dml_source_format_class SourcePixelFormat, - enum dml_swizzle_mode SurfaceTiling, - enum dml_rotation_angle ScanDirection, - dml_uint_t pitch, - dml_uint_t GPUVMMinPageSizeKBytes); - -dml_float_t dml_get_return_bw_mbps_vm_only( - const struct soc_bounding_box_st *soc, - dml_bool_t use_ideal_dram_bw_strobe, - dml_bool_t HostVMEnable, - dml_float_t DCFCLK, - dml_float_t FabricClock, - dml_float_t DRAMSpeed); - -dml_float_t dml_get_return_bw_mbps( - const struct soc_bounding_box_st *soc, - dml_bool_t use_ideal_dram_bw_strobe, - dml_bool_t HostVMEnable, - dml_float_t DCFCLK, - dml_float_t FabricClock, - dml_float_t DRAMSpeed); - -dml_bool_t dml_mode_support( - struct display_mode_lib_st *mode_lib, - dml_uint_t state_idx, - const struct dml_display_cfg_st *display_cfg); - -dml_bool_t dml_mode_programming( - struct display_mode_lib_st *mode_lib, - dml_uint_t state_idx, - const struct dml_display_cfg_st *display_cfg, - bool call_standalone); - -dml_uint_t dml_mode_support_ex( - struct dml_mode_support_ex_params_st *in_out_params); - -dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); - -#define dml_get_per_surface_var_decl(variable, type) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) -#define dml_get_var_decl(var, type) type dml_get_##var(struct display_mode_lib_st *mode_lib) - -dml_get_var_decl(wm_urgent, dml_float_t); -dml_get_var_decl(wm_stutter_exit, dml_float_t); -dml_get_var_decl(wm_stutter_enter_exit, dml_float_t); -dml_get_var_decl(wm_memory_trip, dml_float_t); -dml_get_var_decl(wm_dram_clock_change, dml_float_t); -dml_get_var_decl(wm_z8_stutter_enter_exit, dml_float_t); -dml_get_var_decl(wm_z8_stutter, dml_float_t); -dml_get_var_decl(urgent_latency, dml_float_t); -dml_get_var_decl(clk_dcf_deepsleep, dml_float_t); -dml_get_var_decl(wm_fclk_change, dml_float_t); -dml_get_var_decl(wm_usr_retraining, dml_float_t); -dml_get_var_decl(urgent_latency, dml_float_t); - -dml_get_var_decl(wm_writeback_dram_clock_change, dml_float_t); -dml_get_var_decl(wm_writeback_urgent, dml_float_t); -dml_get_var_decl(stutter_efficiency_no_vblank, dml_float_t); -dml_get_var_decl(stutter_efficiency, dml_float_t); -dml_get_var_decl(stutter_efficiency_z8, dml_float_t); -dml_get_var_decl(stutter_num_bursts_z8, dml_float_t); -dml_get_var_decl(stutter_period, dml_float_t); -dml_get_var_decl(stutter_efficiency_z8_bestcase, dml_float_t); -dml_get_var_decl(stutter_num_bursts_z8_bestcase, dml_float_t); -dml_get_var_decl(stutter_period_bestcase, dml_float_t); -dml_get_var_decl(urgent_latency, dml_float_t); -dml_get_var_decl(urgent_extra_latency, dml_float_t); -dml_get_var_decl(fclk_change_latency, dml_float_t); -dml_get_var_decl(nonurgent_latency, dml_float_t); -dml_get_var_decl(dispclk_calculated, dml_float_t); -dml_get_var_decl(total_data_read_bw, dml_float_t); -dml_get_var_decl(return_bw, dml_float_t); -dml_get_var_decl(return_dram_bw, dml_float_t); -dml_get_var_decl(tcalc, dml_float_t); -dml_get_var_decl(fraction_of_urgent_bandwidth, dml_float_t); -dml_get_var_decl(fraction_of_urgent_bandwidth_imm_flip, dml_float_t); -dml_get_var_decl(comp_buffer_size_kbytes, dml_uint_t); -dml_get_var_decl(pixel_chunk_size_in_kbyte, dml_uint_t); -dml_get_var_decl(alpha_pixel_chunk_size_in_kbyte, dml_uint_t); -dml_get_var_decl(meta_chunk_size_in_kbyte, dml_uint_t); -dml_get_var_decl(min_pixel_chunk_size_in_byte, dml_uint_t); -dml_get_var_decl(min_meta_chunk_size_in_byte, dml_uint_t); -dml_get_var_decl(total_immediate_flip_bytes, dml_uint_t); - -dml_get_per_surface_var_decl(dsc_delay, dml_uint_t); -dml_get_per_surface_var_decl(dppclk_calculated, dml_float_t); -dml_get_per_surface_var_decl(dscclk_calculated, dml_float_t); -dml_get_per_surface_var_decl(min_ttu_vblank_in_us, dml_float_t); -dml_get_per_surface_var_decl(vratio_prefetch_l, dml_float_t); -dml_get_per_surface_var_decl(vratio_prefetch_c, dml_float_t); -dml_get_per_surface_var_decl(dst_x_after_scaler, dml_uint_t); -dml_get_per_surface_var_decl(dst_y_after_scaler, dml_uint_t); -dml_get_per_surface_var_decl(dst_y_per_vm_vblank, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_row_vblank, dml_float_t); -dml_get_per_surface_var_decl(dst_y_prefetch, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_vm_flip, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_row_flip, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_l, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_c, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_l, dml_float_t); -dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_c, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_vm_group_vblank_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_vm_group_flip_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_vm_req_vblank_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_vm_req_flip_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_vm_dmdata_in_us, dml_float_t); -dml_get_per_surface_var_decl(dmdata_dl_delta_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_line_delivery_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_line_delivery_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_req_delivery_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_req_delivery_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_c_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_l_in_us, dml_float_t); -dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_c_in_us, dml_float_t); - -dml_get_per_surface_var_decl(dpte_group_size_in_bytes, dml_uint_t); -dml_get_per_surface_var_decl(vm_group_size_in_bytes, dml_uint_t); -dml_get_per_surface_var_decl(swath_height_l, dml_uint_t); -dml_get_per_surface_var_decl(swath_height_c, dml_uint_t); -dml_get_per_surface_var_decl(dpte_row_height_l, dml_uint_t); -dml_get_per_surface_var_decl(dpte_row_height_c, dml_uint_t); -dml_get_per_surface_var_decl(dpte_row_height_linear_l, dml_uint_t); -dml_get_per_surface_var_decl(dpte_row_height_linear_c, dml_uint_t); -dml_get_per_surface_var_decl(meta_row_height_l, dml_uint_t); -dml_get_per_surface_var_decl(meta_row_height_c, dml_uint_t); -dml_get_per_surface_var_decl(vstartup_calculated, dml_uint_t); -dml_get_per_surface_var_decl(vupdate_offset, dml_uint_t); -dml_get_per_surface_var_decl(vupdate_width, dml_uint_t); -dml_get_per_surface_var_decl(vready_offset, dml_uint_t); -dml_get_per_surface_var_decl(vready_at_or_after_vsync, dml_uint_t); -dml_get_per_surface_var_decl(min_dst_y_next_start, dml_uint_t); -dml_get_per_surface_var_decl(det_stored_buffer_size_l_bytes, dml_uint_t); -dml_get_per_surface_var_decl(det_stored_buffer_size_c_bytes, dml_uint_t); -dml_get_per_surface_var_decl(use_mall_for_static_screen, dml_uint_t); -dml_get_per_surface_var_decl(surface_size_for_mall, dml_uint_t); -dml_get_per_surface_var_decl(dcc_max_uncompressed_block_l, dml_uint_t); -dml_get_per_surface_var_decl(dcc_max_uncompressed_block_c, dml_uint_t); -dml_get_per_surface_var_decl(dcc_max_compressed_block_l, dml_uint_t); -dml_get_per_surface_var_decl(dcc_max_compressed_block_c, dml_uint_t); -dml_get_per_surface_var_decl(dcc_independent_block_l, dml_uint_t); -dml_get_per_surface_var_decl(dcc_independent_block_c, dml_uint_t); -dml_get_per_surface_var_decl(max_active_dram_clock_change_latency_supported, dml_uint_t); -dml_get_per_surface_var_decl(pte_buffer_mode, dml_uint_t); -dml_get_per_surface_var_decl(bigk_fragment_size, dml_uint_t); -dml_get_per_surface_var_decl(dpte_bytes_per_row, dml_uint_t); -dml_get_per_surface_var_decl(meta_bytes_per_row, dml_uint_t); -dml_get_per_surface_var_decl(det_buffer_size_kbytes, dml_uint_t); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h deleted file mode 100644 index d57717b023eb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ /dev/null @@ -1,2033 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DISPLAY_MODE_CORE_STRUCT_H__ -#define __DISPLAY_MODE_CORE_STRUCT_H__ - -#include "display_mode_lib_defines.h" -#include "dml_top_display_cfg_types.h" - -enum dml_project_id { - dml_project_invalid = 0, - dml_project_default = 1, - dml_project_dcn32 = dml_project_default, - dml_project_dcn321 = 2, - dml_project_dcn35 = 3, - dml_project_dcn351 = 4, - dml_project_dcn401 = 5, - dml_project_dcn36 = 6, -}; -enum dml_prefetch_modes { - dml_prefetch_support_uclk_fclk_and_stutter_if_possible = 0, - dml_prefetch_support_uclk_fclk_and_stutter = 1, - dml_prefetch_support_fclk_and_stutter = 2, - dml_prefetch_support_stutter = 3, - dml_prefetch_support_none = 4 -}; -enum dml_use_mall_for_pstate_change_mode { - dml_use_mall_pstate_change_disable = 0, - dml_use_mall_pstate_change_full_frame = 1, - dml_use_mall_pstate_change_sub_viewport = 2, - dml_use_mall_pstate_change_phantom_pipe = 3, - dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4, - dml_use_mall_pstate_change_imall = 5 -}; -enum dml_use_mall_for_static_screen_mode { - dml_use_mall_static_screen_disable = 0, - dml_use_mall_static_screen_enable = 1, - dml_use_mall_static_screen_optimize = 2 -}; -enum dml_output_encoder_class { - dml_dp = 0, - dml_edp = 1, - dml_dp2p0 = 2, - dml_hdmi = 3, - dml_hdmifrl = 4, - dml_none = 5 -}; -enum dml_output_link_dp_rate{ - dml_dp_rate_na = 0, - dml_dp_rate_hbr = 1, - dml_dp_rate_hbr2 = 2, - dml_dp_rate_hbr3 = 3, - dml_dp_rate_uhbr10 = 4, - dml_dp_rate_uhbr13p5 = 5, - dml_dp_rate_uhbr20 = 6 -}; -enum dml_output_type_and_rate__type{ - dml_output_type_unknown = 0, - dml_output_type_dp = 1, - dml_output_type_edp = 2, - dml_output_type_dp2p0 = 3, - dml_output_type_hdmi = 4, - dml_output_type_hdmifrl = 5 -}; -enum dml_output_type_and_rate__rate { - dml_output_rate_unknown = 0, - dml_output_rate_dp_rate_hbr = 1, - dml_output_rate_dp_rate_hbr2 = 2, - dml_output_rate_dp_rate_hbr3 = 3, - dml_output_rate_dp_rate_uhbr10 = 4, - dml_output_rate_dp_rate_uhbr13p5 = 5, - dml_output_rate_dp_rate_uhbr20 = 6, - dml_output_rate_hdmi_rate_3x3 = 7, - dml_output_rate_hdmi_rate_6x3 = 8, - dml_output_rate_hdmi_rate_6x4 = 9, - dml_output_rate_hdmi_rate_8x4 = 10, - dml_output_rate_hdmi_rate_10x4 = 11, - dml_output_rate_hdmi_rate_12x4 = 12 -}; -enum dml_output_format_class { - dml_444 = 0, - dml_s422 = 1, - dml_n422 = 2, - dml_420 = 3 -}; -enum dml_source_format_class { - dml_444_8 = 0, - dml_444_16 = 1, - dml_444_32 = 2, - dml_444_64 = 3, - dml_420_8 = 4, - dml_420_10 = 5, - dml_420_12 = 6, - dml_422_8 = 7, - dml_422_10 = 8, - dml_rgbe_alpha = 9, - dml_rgbe = 10, - dml_mono_8 = 11, - dml_mono_16 = 12 -}; -enum dml_output_bpc_class { - dml_out_6 = 0, - dml_out_8 = 1, - dml_out_10 = 2, - dml_out_12 = 3, - dml_out_16 = 4 -}; -enum dml_output_standard_class { - dml_std_cvt = 0, - dml_std_cea = 1, - dml_std_cvtr2 = 2 -}; -enum dml_rotation_angle { - dml_rotation_0 = 0, - dml_rotation_90 = 1, - dml_rotation_180 = 2, - dml_rotation_270 = 3, - dml_rotation_0m = 4, - dml_rotation_90m = 5, - dml_rotation_180m = 6, - dml_rotation_270m = 7 -}; -enum dml_swizzle_mode { - dml_sw_linear = 0, - dml_sw_256b_s = 1, - dml_sw_256b_d = 2, - dml_sw_256b_r = 3, - dml_sw_4kb_z = 4, - dml_sw_4kb_s = 5, - dml_sw_4kb_d = 6, - dml_sw_4kb_r = 7, - dml_sw_64kb_z = 8, - dml_sw_64kb_s = 9, - dml_sw_64kb_d = 10, - dml_sw_64kb_r = 11, - dml_sw_256kb_z = 12, - dml_sw_256kb_s = 13, - dml_sw_256kb_d = 14, - dml_sw_256kb_r = 15, - dml_sw_64kb_z_t = 16, - dml_sw_64kb_s_t = 17, - dml_sw_64kb_d_t = 18, - dml_sw_64kb_r_t = 19, - dml_sw_4kb_z_x = 20, - dml_sw_4kb_s_x = 21, - dml_sw_4kb_d_x = 22, - dml_sw_4kb_r_x = 23, - dml_sw_64kb_z_x = 24, - dml_sw_64kb_s_x = 25, - dml_sw_64kb_d_x = 26, - dml_sw_64kb_r_x = 27, - dml_sw_256kb_z_x = 28, - dml_sw_256kb_s_x = 29, - dml_sw_256kb_d_x = 30, - dml_sw_256kb_r_x = 31, - dml_sw_256b_2d = 32, - dml_sw_4kb_2d = 33, - dml_sw_64kb_2d = 34, - dml_sw_256kb_2d = 35 -}; -enum dml_lb_depth { - dml_lb_6 = 0, - dml_lb_8 = 1, - dml_lb_10 = 2, - dml_lb_12 = 3, - dml_lb_16 = 4 -}; -enum dml_voltage_state { - dml_vmin_lv = 0, - dml_vmin = 1, - dml_vmid = 2, - dml_vnom = 3, - dml_vmax = 4 -}; -enum dml_source_macro_tile_size { - dml_4k_tile = 0, - dml_64k_tile = 1, - dml_256k_tile = 2 -}; -enum dml_cursor_bpp { - dml_cur_2bit = 0, - dml_cur_32bit = 1, - dml_cur_64bit = 2 -}; -enum dml_dram_clock_change_support { - dml_dram_clock_change_vactive = 0, - dml_dram_clock_change_vblank = 1, - dml_dram_clock_change_vblank_drr = 2, - dml_dram_clock_change_vactive_w_mall_full_frame = 3, - dml_dram_clock_change_vactive_w_mall_sub_vp = 4, - dml_dram_clock_change_vblank_w_mall_full_frame = 5, - dml_dram_clock_change_vblank_drr_w_mall_full_frame = 6, - dml_dram_clock_change_vblank_w_mall_sub_vp = 7, - dml_dram_clock_change_vblank_drr_w_mall_sub_vp = 8, - dml_dram_clock_change_unsupported = 9 -}; -enum dml_fclock_change_support { - dml_fclock_change_vactive = 0, - dml_fclock_change_vblank = 1, - dml_fclock_change_unsupported = 2 -}; -enum dml_dsc_enable { - dml_dsc_disable = 0, - dml_dsc_enable = 1, - dml_dsc_enable_if_necessary = 2 -}; -enum dml_mpc_use_policy { - dml_mpc_disabled = 0, - dml_mpc_as_possible = 1, - dml_mpc_as_needed_for_voltage = 2, - dml_mpc_as_needed_for_pstate_and_voltage = 3, - dml_mpc_as_needed = 4, - dml_mpc_2to1 = 5 -}; -enum dml_odm_use_policy { - dml_odm_use_policy_bypass = 0, - dml_odm_use_policy_combine_as_needed = 1, - dml_odm_use_policy_combine_2to1 = 2, - dml_odm_use_policy_combine_3to1 = 3, - dml_odm_use_policy_combine_4to1 = 4, - dml_odm_use_policy_split_1to2 = 5, - dml_odm_use_policy_mso_1to2 = 6, - dml_odm_use_policy_mso_1to4 = 7 -}; -enum dml_odm_mode { - dml_odm_mode_bypass = 0, - dml_odm_mode_combine_2to1 = 1, - dml_odm_mode_combine_3to1 = 2, - dml_odm_mode_combine_4to1 = 3, - dml_odm_mode_split_1to2 = 4, - dml_odm_mode_mso_1to2 = 5, - dml_odm_mode_mso_1to4 = 6 -}; -enum dml_writeback_configuration { - dml_whole_buffer_for_single_stream_no_interleave = 0, - dml_whole_buffer_for_single_stream_interleave = 1 -}; -enum dml_immediate_flip_requirement { - dml_immediate_flip_not_required = 0, - dml_immediate_flip_required = 1, - dml_immediate_flip_if_possible = 2 -}; -enum dml_unbounded_requesting_policy { - dml_unbounded_requesting_enable = 0, - dml_unbounded_requesting_edp_only = 1, - dml_unbounded_requesting_disable = 2 -}; -enum dml_clk_cfg_policy { - dml_use_required_freq = 0, - dml_use_override_freq = 1, - dml_use_state_freq = 2 -}; - - -struct soc_state_bounding_box_st { - dml_float_t socclk_mhz; - dml_float_t dscclk_mhz; - dml_float_t phyclk_mhz; - dml_float_t phyclk_d18_mhz; - dml_float_t phyclk_d32_mhz; - dml_float_t dtbclk_mhz; - dml_float_t fabricclk_mhz; - dml_float_t dcfclk_mhz; - dml_float_t dispclk_mhz; - dml_float_t dppclk_mhz; - dml_float_t dram_speed_mts; - dml_float_t urgent_latency_pixel_data_only_us; - dml_float_t urgent_latency_pixel_mixed_with_vm_data_us; - dml_float_t urgent_latency_vm_data_only_us; - dml_float_t writeback_latency_us; - dml_float_t urgent_latency_adjustment_fabric_clock_component_us; - dml_float_t urgent_latency_adjustment_fabric_clock_reference_mhz; - dml_float_t sr_exit_time_us; - dml_float_t sr_enter_plus_exit_time_us; - dml_float_t sr_exit_z8_time_us; - dml_float_t sr_enter_plus_exit_z8_time_us; - dml_float_t dram_clock_change_latency_us; - dml_float_t fclk_change_latency_us; - dml_float_t usr_retraining_latency_us; - dml_bool_t use_ideal_dram_bw_strobe; - dml_float_t g6_temp_read_blackout_us; - - struct { - dml_uint_t urgent_ramp_uclk_cycles; - dml_uint_t trip_to_memory_uclk_cycles; - dml_uint_t meta_trip_to_memory_uclk_cycles; - dml_uint_t maximum_latency_when_urgent_uclk_cycles; - dml_uint_t average_latency_when_urgent_uclk_cycles; - dml_uint_t maximum_latency_when_non_urgent_uclk_cycles; - dml_uint_t average_latency_when_non_urgent_uclk_cycles; - } dml_dcn401_uclk_dpm_dependent_soc_qos_params; -}; - -struct soc_bounding_box_st { - dml_float_t dprefclk_mhz; - dml_float_t xtalclk_mhz; - dml_float_t pcierefclk_mhz; - dml_float_t refclk_mhz; - dml_float_t amclk_mhz; - dml_uint_t max_outstanding_reqs; - dml_float_t pct_ideal_sdp_bw_after_urgent; - dml_float_t pct_ideal_fabric_bw_after_urgent; - dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only; - dml_float_t pct_ideal_dram_bw_after_urgent_pixel_and_vm; - dml_float_t pct_ideal_dram_bw_after_urgent_vm_only; - dml_float_t pct_ideal_dram_bw_after_urgent_strobe; - dml_float_t max_avg_sdp_bw_use_normal_percent; - dml_float_t max_avg_fabric_bw_use_normal_percent; - dml_float_t max_avg_dram_bw_use_normal_percent; - dml_float_t max_avg_dram_bw_use_normal_strobe_percent; - - dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent; - dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent; - dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only; - dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm; - dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only; - dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent; - dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent; - dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent; - - dml_uint_t round_trip_ping_latency_dcfclk_cycles; - dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes; - dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; - dml_uint_t urgent_out_of_order_return_per_channel_vm_only_bytes; - dml_uint_t num_chans; - dml_uint_t return_bus_width_bytes; - dml_uint_t dram_channel_width_bytes; - dml_uint_t fabric_datapath_to_dcn_data_return_bytes; - dml_uint_t hostvm_min_page_size_kbytes; - dml_uint_t gpuvm_min_page_size_kbytes; - dml_float_t phy_downspread_percent; - dml_float_t dcn_downspread_percent; - dml_float_t smn_latency_us; - dml_uint_t mall_allocated_for_dcn_mbytes; - dml_float_t dispclk_dppclk_vco_speed_mhz; - dml_bool_t do_urgent_latency_adjustment; - - dml_uint_t mem_word_bytes; - dml_uint_t num_dcc_mcaches; - dml_uint_t mcache_size_bytes; - dml_uint_t mcache_line_size_bytes; - - struct { - dml_bool_t UseNewDCN401SOCParameters; - dml_uint_t df_qos_response_time_fclk_cycles; - dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles; - dml_uint_t mall_overhead_fclk_cycles; - dml_uint_t meta_trip_adder_fclk_cycles; - dml_uint_t average_transport_distance_fclk_cycles; - dml_float_t umc_urgent_ramp_latency_margin; - dml_float_t umc_max_latency_margin; - dml_float_t umc_average_latency_margin; - dml_float_t fabric_max_transport_latency_margin; - dml_float_t fabric_average_transport_latency_margin; - } dml_dcn401_soc_qos_params; - -}; - -struct ip_params_st { - dml_uint_t vblank_nom_default_us; - dml_uint_t rob_buffer_size_kbytes; - dml_uint_t config_return_buffer_size_in_kbytes; - dml_uint_t config_return_buffer_segment_size_in_kbytes; - dml_uint_t compressed_buffer_segment_size_in_kbytes; - dml_uint_t meta_fifo_size_in_kentries; - dml_uint_t zero_size_buffer_entries; - dml_uint_t dpte_buffer_size_in_pte_reqs_luma; - dml_uint_t dpte_buffer_size_in_pte_reqs_chroma; - dml_uint_t dcc_meta_buffer_size_bytes; - dml_bool_t gpuvm_enable; - dml_bool_t hostvm_enable; - dml_uint_t gpuvm_max_page_table_levels; - dml_uint_t hostvm_max_page_table_levels; - dml_uint_t pixel_chunk_size_kbytes; - dml_uint_t alpha_pixel_chunk_size_kbytes; - dml_uint_t min_pixel_chunk_size_bytes; - dml_uint_t meta_chunk_size_kbytes; - dml_uint_t min_meta_chunk_size_bytes; - dml_uint_t writeback_chunk_size_kbytes; - dml_uint_t line_buffer_size_bits; - dml_uint_t max_line_buffer_lines; - dml_uint_t writeback_interface_buffer_size_kbytes; - dml_uint_t max_num_dpp; - dml_uint_t max_num_otg; - dml_uint_t max_num_wb; - dml_uint_t max_dchub_pscl_bw_pix_per_clk; - dml_uint_t max_pscl_lb_bw_pix_per_clk; - dml_uint_t max_lb_vscl_bw_pix_per_clk; - dml_uint_t max_vscl_hscl_bw_pix_per_clk; - dml_float_t max_hscl_ratio; - dml_float_t max_vscl_ratio; - dml_uint_t max_hscl_taps; - dml_uint_t max_vscl_taps; - dml_uint_t num_dsc; - dml_uint_t maximum_dsc_bits_per_component; - dml_uint_t maximum_pixels_per_line_per_dsc_unit; - dml_bool_t dsc422_native_support; - dml_bool_t cursor_64bpp_support; - dml_float_t dispclk_ramp_margin_percent; - dml_uint_t dppclk_delay_subtotal; - dml_uint_t dppclk_delay_scl; - dml_uint_t dppclk_delay_scl_lb_only; - dml_uint_t dppclk_delay_cnvc_formatter; - dml_uint_t dppclk_delay_cnvc_cursor; - dml_uint_t cursor_buffer_size; - dml_uint_t cursor_chunk_size; - dml_uint_t dispclk_delay_subtotal; - dml_bool_t dynamic_metadata_vm_enabled; - dml_uint_t max_inter_dcn_tile_repeaters; - dml_uint_t max_num_hdmi_frl_outputs; - dml_uint_t max_num_dp2p0_outputs; - dml_uint_t max_num_dp2p0_streams; - dml_bool_t dcc_supported; - dml_bool_t ptoi_supported; - dml_float_t writeback_max_hscl_ratio; - dml_float_t writeback_max_vscl_ratio; - dml_float_t writeback_min_hscl_ratio; - dml_float_t writeback_min_vscl_ratio; - dml_uint_t writeback_max_hscl_taps; - dml_uint_t writeback_max_vscl_taps; - dml_uint_t writeback_line_buffer_buffer_size; -}; - -struct DmlPipe { - dml_float_t Dppclk; - dml_float_t Dispclk; - dml_float_t PixelClock; - dml_float_t DCFClkDeepSleep; - dml_uint_t DPPPerSurface; - dml_bool_t ScalerEnabled; - enum dml_rotation_angle SourceScan; - dml_uint_t ViewportHeight; - dml_uint_t ViewportHeightChroma; - dml_uint_t BlockWidth256BytesY; - dml_uint_t BlockHeight256BytesY; - dml_uint_t BlockWidth256BytesC; - dml_uint_t BlockHeight256BytesC; - dml_uint_t BlockWidthY; - dml_uint_t BlockHeightY; - dml_uint_t BlockWidthC; - dml_uint_t BlockHeightC; - dml_uint_t InterlaceEnable; - dml_uint_t NumberOfCursors; - dml_uint_t VBlank; - dml_uint_t HTotal; - dml_uint_t HActive; - dml_bool_t DCCEnable; - enum dml_odm_mode ODMMode; - enum dml_source_format_class SourcePixelFormat; - enum dml_swizzle_mode SurfaceTiling; - dml_uint_t BytePerPixelY; - dml_uint_t BytePerPixelC; - dml_bool_t ProgressiveToInterlaceUnitInOPP; - dml_float_t VRatio; - dml_float_t VRatioChroma; - dml_uint_t VTaps; - dml_uint_t VTapsChroma; - dml_uint_t PitchY; - dml_uint_t DCCMetaPitchY; - dml_uint_t PitchC; - dml_uint_t DCCMetaPitchC; - dml_bool_t ViewportStationary; - dml_uint_t ViewportXStart; - dml_uint_t ViewportYStart; - dml_uint_t ViewportXStartC; - dml_uint_t ViewportYStartC; - dml_bool_t FORCE_ONE_ROW_FOR_FRAME; - dml_uint_t SwathHeightY; - dml_uint_t SwathHeightC; -}; - -struct Watermarks { - dml_float_t UrgentWatermark; - dml_float_t WritebackUrgentWatermark; - dml_float_t DRAMClockChangeWatermark; - dml_float_t FCLKChangeWatermark; - dml_float_t WritebackDRAMClockChangeWatermark; - dml_float_t WritebackFCLKChangeWatermark; - dml_float_t StutterExitWatermark; - dml_float_t StutterEnterPlusExitWatermark; - dml_float_t Z8StutterExitWatermark; - dml_float_t Z8StutterEnterPlusExitWatermark; - dml_float_t USRRetrainingWatermark; -}; - -struct SOCParametersList { - dml_float_t UrgentLatency; - dml_float_t ExtraLatency; - dml_float_t WritebackLatency; - dml_float_t DRAMClockChangeLatency; - dml_float_t FCLKChangeLatency; - dml_float_t SRExitTime; - dml_float_t SREnterPlusExitTime; - dml_float_t SRExitZ8Time; - dml_float_t SREnterPlusExitZ8Time; - dml_float_t USRRetrainingLatency; - dml_float_t SMNLatency; -}; - -/// @brief Struct that represent Plane configration of a display cfg -struct dml_plane_cfg_st { - // - // Pipe/Surface Parameters - // - dml_bool_t GPUVMEnable; /// > 23) & 255) - 128; - - x &= ~(255 << 23); - x += 127 << 23; - *exp_ptr = x; - - in = ((-1.0f / 3) * in + 2) * in - 2.0f / 3; - - return (in + log_2); -} - -dml_bool_t dml_util_is_420(enum dml_source_format_class source_format) -{ - dml_bool_t val = false; - - switch (source_format) { - case dml_444_16: - val = 0; - break; - case dml_444_32: - val = 0; - break; - case dml_444_64: - val = 0; - break; - case dml_420_8: - val = 1; - break; - case dml_420_10: - val = 1; - break; - case dml_422_8: - val = 0; - break; - case dml_422_10: - val = 0; - break; - default: - ASSERT(0); - break; - } - return val; -} - -static inline float dcn_bw_pow(float a, float exp) -{ - float temp; - /*ASSERT(exp == (int)exp);*/ - if ((int)exp == 0) - return 1; - temp = dcn_bw_pow(a, (int)(exp / 2)); - if (((int)exp % 2) == 0) { - return temp * temp; - } else { - if ((int)exp > 0) - return a * temp * temp; - else - return (temp * temp) / a; - } -} - -static inline float dcn_bw_ceil2(const float arg, const float significance) -{ - ASSERT(significance != 0); - - return ((int)(arg / significance + 0.99999)) * significance; -} - -static inline float dcn_bw_floor2(const float arg, const float significance) -{ - ASSERT(significance != 0); - - return ((int)(arg / significance)) * significance; -} - -dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity) -{ - if (granularity == 0) - return 0; - //return (dml_float_t) (ceil(x / granularity) * granularity); - return (dml_float_t)dcn_bw_ceil2(x, granularity); -} - -dml_float_t dml_floor(dml_float_t x, dml_float_t granularity) -{ - if (granularity == 0) - return 0; - //return (dml_float_t) (floor(x / granularity) * granularity); - return (dml_float_t)dcn_bw_floor2(x, granularity); -} - -dml_float_t dml_min(dml_float_t x, dml_float_t y) -{ - if (x != x) - return y; - if (y != y) - return x; - if (x < y) - return x; - else - return y; -} - -dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z) -{ - return dml_min(dml_min(x, y), z); -} - -dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w) -{ - return dml_min(dml_min(dml_min(x, y), z), w); -} - -dml_float_t dml_max(dml_float_t x, dml_float_t y) -{ - if (x != x) - return y; - if (y != y) - return x; -if (x > y) - return x; - else - return y; -} -dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z) -{ - return dml_max(dml_max(x, y), z); -} -dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d) -{ - return dml_max(dml_max(a, b), dml_max(c, d)); -} -dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e) -{ - return dml_max(dml_max4(a, b, c, d), e); -} -dml_float_t dml_log(dml_float_t x, dml_float_t base) -{ - return (dml_float_t) (_log(x) / _log(base)); -} - -dml_float_t dml_log2(dml_float_t x) -{ - return (dml_float_t) (_log(x) / _log(2)); -} - -dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding) -{ -// if (bankers_rounding) -// return (dml_float_t) lrint(val); -// else { -// return round(val); - double round_pt = 0.5; - double ceil = dml_ceil(val, 1); - double floor = dml_floor(val, 1); - - if (val - floor >= round_pt) - return ceil; - else - return floor; -// } -} - -dml_float_t dml_pow(dml_float_t base, int exp) -{ - return (dml_float_t) dcn_bw_pow(base, exp); -} - -dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up) -{ - dml_uint_t remainder; - - if (multiple == 0) - return num; - - remainder = num % multiple; - if (remainder == 0) - return num; - - if (up) - return (num + multiple - remainder); - else - return (num - remainder); -} - -void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n"); - dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size); - dml_print("DML: min_chunk_size = 0x%x\n", rq_regs->min_chunk_size); - dml_print("DML: meta_chunk_size = 0x%x\n", rq_regs->meta_chunk_size); - dml_print("DML: min_meta_chunk_size = 0x%x\n", rq_regs->min_meta_chunk_size); - dml_print("DML: dpte_group_size = 0x%x\n", rq_regs->dpte_group_size); - dml_print("DML: mpte_group_size = 0x%x\n", rq_regs->mpte_group_size); - dml_print("DML: swath_height = 0x%x\n", rq_regs->swath_height); - dml_print("DML: pte_row_height_linear = 0x%x\n", rq_regs->pte_row_height_linear); - dml_print("DML: ===================================== \n"); -} - -void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DISPLAY_RQ_REGS_ST\n"); - dml_print("DML: \n"); - dml_print_data_rq_regs_st(&rq_regs->rq_regs_l); - dml_print("DML: \n"); - dml_print_data_rq_regs_st(&rq_regs->rq_regs_c); - dml_print("DML: drq_expansion_mode = 0x%x\n", rq_regs->drq_expansion_mode); - dml_print("DML: prq_expansion_mode = 0x%x\n", rq_regs->prq_expansion_mode); - dml_print("DML: mrq_expansion_mode = 0x%x\n", rq_regs->mrq_expansion_mode); - dml_print("DML: crq_expansion_mode = 0x%x\n", rq_regs->crq_expansion_mode); - dml_print("DML: plane1_base_address = 0x%x\n", rq_regs->plane1_base_address); - dml_print("DML: ===================================== \n"); -} - -void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DISPLAY_DLG_REGS_ST \n"); - dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end); - dml_print("DML: dlg_vblank_end = 0x%x\n", dlg_regs->dlg_vblank_end); - dml_print("DML: min_dst_y_next_start = 0x%x\n", dlg_regs->min_dst_y_next_start); - dml_print("DML: refcyc_per_htotal = 0x%x\n", dlg_regs->refcyc_per_htotal); - dml_print("DML: refcyc_x_after_scaler = 0x%x\n", dlg_regs->refcyc_x_after_scaler); - dml_print("DML: dst_y_after_scaler = 0x%x\n", dlg_regs->dst_y_after_scaler); - dml_print("DML: dst_y_prefetch = 0x%x\n", dlg_regs->dst_y_prefetch); - dml_print("DML: dst_y_per_vm_vblank = 0x%x\n", dlg_regs->dst_y_per_vm_vblank); - dml_print("DML: dst_y_per_row_vblank = 0x%x\n", dlg_regs->dst_y_per_row_vblank); - dml_print("DML: dst_y_per_vm_flip = 0x%x\n", dlg_regs->dst_y_per_vm_flip); - dml_print("DML: dst_y_per_row_flip = 0x%x\n", dlg_regs->dst_y_per_row_flip); - dml_print("DML: ref_freq_to_pix_freq = 0x%x\n", dlg_regs->ref_freq_to_pix_freq); - dml_print("DML: vratio_prefetch = 0x%x\n", dlg_regs->vratio_prefetch); - dml_print("DML: vratio_prefetch_c = 0x%x\n", dlg_regs->vratio_prefetch_c); - dml_print("DML: refcyc_per_pte_group_vblank_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_l); - dml_print("DML: refcyc_per_pte_group_vblank_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_c); - dml_print("DML: refcyc_per_meta_chunk_vblank_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_l); - dml_print("DML: refcyc_per_meta_chunk_vblank_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_c); - dml_print("DML: refcyc_per_pte_group_flip_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_l); - dml_print("DML: refcyc_per_pte_group_flip_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_c); - dml_print("DML: refcyc_per_meta_chunk_flip_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_l); - dml_print("DML: refcyc_per_meta_chunk_flip_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_c); - dml_print("DML: dst_y_per_pte_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_l); - dml_print("DML: dst_y_per_pte_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_c); - dml_print("DML: refcyc_per_pte_group_nom_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_l); - dml_print("DML: refcyc_per_pte_group_nom_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_c); - dml_print("DML: dst_y_per_meta_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_l); - dml_print("DML: dst_y_per_meta_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_c); - dml_print("DML: refcyc_per_meta_chunk_nom_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_l); - dml_print("DML: refcyc_per_meta_chunk_nom_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_c); - dml_print("DML: refcyc_per_line_delivery_pre_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_l); - dml_print("DML: refcyc_per_line_delivery_pre_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_c); - dml_print("DML: refcyc_per_line_delivery_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_l); - dml_print("DML: refcyc_per_line_delivery_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_c); - dml_print("DML: refcyc_per_vm_group_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_group_vblank); - dml_print("DML: refcyc_per_vm_group_flip = 0x%x\n", dlg_regs->refcyc_per_vm_group_flip); - dml_print("DML: refcyc_per_vm_req_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_req_vblank); - dml_print("DML: refcyc_per_vm_req_flip = 0x%x\n", dlg_regs->refcyc_per_vm_req_flip); - dml_print("DML: chunk_hdl_adjust_cur0 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur0); - dml_print("DML: dst_y_offset_cur1 = 0x%x\n", dlg_regs->dst_y_offset_cur1); - dml_print("DML: chunk_hdl_adjust_cur1 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur1); - dml_print("DML: vready_after_vcount0 = 0x%x\n", dlg_regs->vready_after_vcount0); - dml_print("DML: dst_y_delta_drq_limit = 0x%x\n", dlg_regs->dst_y_delta_drq_limit); - dml_print("DML: refcyc_per_vm_dmdata = 0x%x\n", dlg_regs->refcyc_per_vm_dmdata); - dml_print("DML: ===================================== \n"); -} - -void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DISPLAY_TTU_REGS_ST \n"); - dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm); - dml_print("DML: qos_level_high_wm = 0x%x\n", ttu_regs->qos_level_high_wm); - dml_print("DML: min_ttu_vblank = 0x%x\n", ttu_regs->min_ttu_vblank); - dml_print("DML: qos_level_flip = 0x%x\n", ttu_regs->qos_level_flip); - dml_print("DML: refcyc_per_req_delivery_pre_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_l); - dml_print("DML: refcyc_per_req_delivery_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_l); - dml_print("DML: refcyc_per_req_delivery_pre_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_c); - dml_print("DML: refcyc_per_req_delivery_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_c); - dml_print("DML: refcyc_per_req_delivery_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur0); - dml_print("DML: refcyc_per_req_delivery_pre_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur0); - dml_print("DML: refcyc_per_req_delivery_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur1); - dml_print("DML: refcyc_per_req_delivery_pre_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur1); - dml_print("DML: qos_level_fixed_l = 0x%x\n", ttu_regs->qos_level_fixed_l); - dml_print("DML: qos_ramp_disable_l = 0x%x\n", ttu_regs->qos_ramp_disable_l); - dml_print("DML: qos_level_fixed_c = 0x%x\n", ttu_regs->qos_level_fixed_c); - dml_print("DML: qos_ramp_disable_c = 0x%x\n", ttu_regs->qos_ramp_disable_c); - dml_print("DML: qos_level_fixed_cur0 = 0x%x\n", ttu_regs->qos_level_fixed_cur0); - dml_print("DML: qos_ramp_disable_cur0 = 0x%x\n", ttu_regs->qos_ramp_disable_cur0); - dml_print("DML: qos_level_fixed_cur1 = 0x%x\n", ttu_regs->qos_level_fixed_cur1); - dml_print("DML: qos_ramp_disable_cur1 = 0x%x\n", ttu_regs->qos_ramp_disable_cur1); - dml_print("DML: ===================================== \n"); -} - -void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DML_MODE_EVAL_POLICY_ST\n"); - dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting); - dml_print("DML: Policy: UseMinimumRequiredDCFCLK = 0x%x\n", policy->UseMinimumRequiredDCFCLK); - dml_print("DML: Policy: DRAMClockChangeRequirementFinal = 0x%x\n", policy->DRAMClockChangeRequirementFinal); - dml_print("DML: Policy: FCLKChangeRequirementFinal = 0x%x\n", policy->FCLKChangeRequirementFinal); - dml_print("DML: Policy: USRRetrainingRequiredFinal = 0x%x\n", policy->USRRetrainingRequiredFinal); - dml_print("DML: Policy: EnhancedPrefetchScheduleAccelerationFinal = 0x%x\n", policy->EnhancedPrefetchScheduleAccelerationFinal); - dml_print("DML: Policy: NomDETInKByteOverrideEnable = 0x%x\n", policy->NomDETInKByteOverrideEnable); - dml_print("DML: Policy: NomDETInKByteOverrideValue = 0x%x\n", policy->NomDETInKByteOverrideValue); - dml_print("DML: Policy: DCCProgrammingAssumesScanDirectionUnknownFinal = 0x%x\n", policy->DCCProgrammingAssumesScanDirectionUnknownFinal); - dml_print("DML: Policy: SynchronizeTimingsFinal = 0x%x\n", policy->SynchronizeTimingsFinal); - dml_print("DML: Policy: SynchronizeDRRDisplaysForUCLKPStateChangeFinal = 0x%x\n", policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal); - dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported); - dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported); - - for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { - dml_print("DML: i=%0d, Policy: MPCCombineUse = 0x%x\n", i, policy->MPCCombineUse[i]); - dml_print("DML: i=%0d, Policy: ODMUse = 0x%x\n", i, policy->ODMUse[i]); - dml_print("DML: i=%0d, Policy: ImmediateFlipRequirement = 0x%x\n", i, policy->ImmediateFlipRequirement[i]); - dml_print("DML: i=%0d, Policy: AllowForPStateChangeOrStutterInVBlank = 0x%x\n", i, policy->AllowForPStateChangeOrStutterInVBlank[i]); - } - dml_print("DML: ===================================== \n"); -} - -void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j) -{ - dml_print("DML: MODE SUPPORT: ===============================================\n"); - dml_print("DML: MODE SUPPORT: Voltage State %d\n", j); - dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Scale Ratio And Taps : %s\n", mode_lib->ms.support.ScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Source Format Pixel And Scan : %s\n", mode_lib->ms.support.SourceFormatPixelAndScanSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Viewport Size : %s\n", mode_lib->ms.support.ViewportSizeSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Link Rate Does Not Match DP Version : %s\n", mode_lib->ms.support.LinkRateDoesNotMatchDPVersion == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Link Rate For Multistream Not Indicated : %s\n", mode_lib->ms.support.LinkRateForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: BPP For Multi stream Not Indicated : %s\n", mode_lib->ms.support.BPPForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Multistream With HDMI Or eDP : %s\n", mode_lib->ms.support.MultistreamWithHDMIOreDP == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Exceeded Multistream Slots : %s\n", mode_lib->ms.support.ExceededMultistreamSlots == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: MSO Or ODM Split With Non DP Link : %s\n", mode_lib->ms.support.MSOOrODMSplitWithNonDPLink == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Not Enough Lanes For MSO : %s\n", mode_lib->ms.support.NotEnoughLanesForMSO == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: LinkCapacitySupport : %s\n", mode_lib->ms.support.LinkCapacitySupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: P2IWith420 : %s\n", mode_lib->ms.support.P2IWith420 == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DSCOnlyIfNecessaryWithBPP : %s\n", mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DSC422NativeNotSupported : %s\n", mode_lib->ms.support.DSC422NativeNotSupported == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: MPCCombineMethodIncompatible : %s\n", mode_lib->ms.support.MPCCombineMethodIncompatible == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ODMCombineTwoToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ODMCombineFourToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: NotEnoughDSCUnits : %s\n", mode_lib->ms.support.NotEnoughDSCUnits == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: NotEnoughDSCSlices : %s\n", mode_lib->ms.support.NotEnoughDSCSlices == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe : %s\n", mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPStateAndStaticScreen : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DSCCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: PixelsPerLinePerDSCUnitSupport : %s\n", mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DTBCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPState : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPState == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified : %s\n", mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ROB Support : %s\n", mode_lib->ms.support.ROBSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DISPCLK DPPCLK Support : %s\n", mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Total Available Pipes Support : %s\n", mode_lib->ms.support.TotalAvailablePipesSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Number Of OTG Support : %s\n", mode_lib->ms.support.NumberOfOTGSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Number Of DP2p0 Support : %s\n", mode_lib->ms.support.NumberOfDP2p0Support == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Writeback Latency Support : %s\n", mode_lib->ms.support.WritebackLatencySupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Writeback Scale Ratio And Taps Support : %s\n", mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Cursor Support : %s\n", mode_lib->ms.support.CursorSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Pitch Support : %s\n", mode_lib->ms.support.PitchSupport == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Viewport Exceeds Surface : %s\n", mode_lib->ms.support.ViewportExceedsSurface == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Prefetch Supported : %s\n", mode_lib->ms.support.PrefetchSupported[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: VActive Bandwith Support : %s\n", mode_lib->ms.support.VActiveBandwithSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Dynamic Metadata Supported : %s\n", mode_lib->ms.support.DynamicMetadataSupported[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Total Vertical Active Bandwidth Support : %s\n", mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: VRatio In Prefetch Supported : %s\n", mode_lib->ms.support.VRatioInPrefetchSupported[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: PTE Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: DCC Meta Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Non supported DSC Input BPC : %s\n", mode_lib->ms.support.NonsupportedDSCInputBPC == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Exceeded MALL Size : %s\n", mode_lib->ms.support.ExceededMALLSize == false ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: Host VM or Immediate Flip Supported : %s\n", ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !mode_lib->scratch.dml_core_mode_support_locals.ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: dram clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.dram_clock_change_support == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: f_clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.f_clock_change_support == true ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: USR Retraining Support : %s\n", (!mode_lib->ms.policy.USRRetrainingRequiredFinal || &mode_lib->ms.support.USRRetrainingSupport[j]) ? "Supported" : "NOT Supported"); - dml_print("DML: MODE SUPPORT: ===============================================\n"); -} - -void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only) -{ - dml_print("DML: ===================================== \n"); - dml_print("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ModeIsSupported == 0) - dml_print("DML: support: ModeIsSupported = 0x%x\n", support->ModeIsSupported); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml_print("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml_print("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport); - if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - dml_print("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport); - if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - dml_print("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->MPCCombineMethodIncompatible == 1) - dml_print("DML: support: MPCCombineMethodIncompatible = 0x%x\n", support->MPCCombineMethodIncompatible); - if (!fail_only || support->P2IWith420 == 1) - dml_print("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml_print("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml_print("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported); - if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - dml_print("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion); - if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - dml_print("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated); - if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - dml_print("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated); - if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - dml_print("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP); - if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - dml_print("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink); - if (!fail_only || support->NotEnoughLanesForMSO == 1) - dml_print("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml_print("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml_print("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support); - if (!fail_only || support->NonsupportedDSCInputBPC == 1) - dml_print("DML: support: NonsupportedDSCInputBPC = 0x%x\n", support->NonsupportedDSCInputBPC); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml_print("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml_print("DML: support: CursorSupport = 0x%x\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml_print("DML: support: PitchSupport = 0x%x\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml_print("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml_print("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml_print("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == 1) - dml_print("DML: support: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = 0x%x\n", support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml_print("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml_print("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml_print("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState); - - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml_print("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots); - if (!fail_only || support->ODMCombineTwoToOneSupportCheckOK == 0) - dml_print("DML: support: ODMCombineTwoToOneSupportCheckOK = 0x%x\n", support->ODMCombineTwoToOneSupportCheckOK); - if (!fail_only || support->ODMCombineFourToOneSupportCheckOK == 0) - dml_print("DML: support: ODMCombineFourToOneSupportCheckOK = 0x%x\n", support->ODMCombineFourToOneSupportCheckOK); - if (!fail_only || support->NotEnoughDSCUnits == 1) - dml_print("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits); - if (!fail_only || support->NotEnoughDSCSlices == 1) - dml_print("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml_print("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport); - if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - dml_print("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported); - if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - dml_print("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml_print("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport); - - for (dml_uint_t j = 0; j < 2; j++) { - if (!fail_only || support->DRAMClockChangeSupport[j] == dml_dram_clock_change_unsupported) - dml_print("DML: support: combine=%d, DRAMClockChangeSupport = %d\n", j, support->DRAMClockChangeSupport[j]); - if (!fail_only || support->FCLKChangeSupport[j] == dml_fclock_change_unsupported) - dml_print("DML: support: combine=%d, FCLKChangeSupport = %d\n", j, support->FCLKChangeSupport[j]); - if (!fail_only || support->ROBSupport[j] == 0) - dml_print("DML: support: combine=%d, ROBSupport = %d\n", j, support->ROBSupport[j]); - if (!fail_only || support->PTEBufferSizeNotExceeded[j] == 0) - dml_print("DML: support: combine=%d, PTEBufferSizeNotExceeded = %d\n", j, support->PTEBufferSizeNotExceeded[j]); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded[j] == 0) - dml_print("DML: support: combine=%d, DCCMetaBufferSizeNotExceeded = %d\n", j, support->DCCMetaBufferSizeNotExceeded[j]); - if (!fail_only || support->TotalVerticalActiveBandwidthSupport[j] == 0) - dml_print("DML: support: combine=%d, TotalVerticalActiveBandwidthSupport = %d\n", j, support->TotalVerticalActiveBandwidthSupport[j]); - if (!fail_only || support->USRRetrainingSupport[j] == 0) - dml_print("DML: support: combine=%d, USRRetrainingSupport = %d\n", j, support->USRRetrainingSupport[j]); - if (!fail_only || support->VActiveBandwithSupport[j] == 0) - dml_print("DML: support: combine=%d, VActiveBandwithSupport = %d\n", j, support->VActiveBandwithSupport[j]); - if (!fail_only || support->PrefetchSupported[j] == 0) - dml_print("DML: support: combine=%d, PrefetchSupported = %d\n", j, support->PrefetchSupported[j]); - if (!fail_only || support->DynamicMetadataSupported[j] == 0) - dml_print("DML: support: combine=%d, DynamicMetadataSupported = %d\n", j, support->DynamicMetadataSupported[j]); - if (!fail_only || support->VRatioInPrefetchSupported[j] == 0) - dml_print("DML: support: combine=%d, VRatioInPrefetchSupported = %d\n", j, support->VRatioInPrefetchSupported[j]); - if (!fail_only || support->DISPCLK_DPPCLK_Support[j] == 0) - dml_print("DML: support: combine=%d, DISPCLK_DPPCLK_Support = %d\n", j, support->DISPCLK_DPPCLK_Support[j]); - if (!fail_only || support->TotalAvailablePipesSupport[j] == 0) - dml_print("DML: support: combine=%d, TotalAvailablePipesSupport = %d\n", j, support->TotalAvailablePipesSupport[j]); - if (!fail_only || support->ModeSupport[j] == 0) - dml_print("DML: support: combine=%d, ModeSupport = %d\n", j, support->ModeSupport[j]); - if (!fail_only || support->ViewportSizeSupport[j] == 0) - dml_print("DML: support: combine=%d, ViewportSizeSupport = %d\n", j, support->ViewportSizeSupport[j]); - if (!fail_only || support->ImmediateFlipSupportedForState[j] == 0) - dml_print("DML: support: combine=%d, ImmediateFlipSupportedForState = %d\n", j, support->ImmediateFlipSupportedForState[j]); - } -} - -void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane) -{ - for (dml_uint_t i = 0; i < num_plane; i++) { - dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]); - dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]); - dml_print("DML: timing_cfg: plane=%d, HActive = %d\n", i, timing->HActive[i]); - dml_print("DML: timing_cfg: plane=%d, VActive = %d\n", i, timing->VActive[i]); - dml_print("DML: timing_cfg: plane=%d, VFrontPorch = %d\n", i, timing->VFrontPorch[i]); - dml_print("DML: timing_cfg: plane=%d, VBlankNom = %d\n", i, timing->VBlankNom[i]); - dml_print("DML: timing_cfg: plane=%d, RefreshRate = %d\n", i, timing->RefreshRate[i]); - dml_print("DML: timing_cfg: plane=%d, PixelClock = %f\n", i, timing->PixelClock[i]); - dml_print("DML: timing_cfg: plane=%d, Interlace = %d\n", i, timing->Interlace[i]); - dml_print("DML: timing_cfg: plane=%d, DRRDisplay = %d\n", i, timing->DRRDisplay[i]); - } -} - -void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane) -{ - dml_print("DML: plane_cfg: num_plane = %d\n", num_plane); - dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable); - dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable); - dml_print("DML: plane_cfg: GPUVMMaxPageTableLevels = %d\n", plane->GPUVMMaxPageTableLevels); - dml_print("DML: plane_cfg: HostVMMaxPageTableLevels = %d\n", plane->HostVMMaxPageTableLevels); - - for (dml_uint_t i = 0; i < num_plane; i++) { - dml_print("DML: plane_cfg: plane=%d, GPUVMMinPageSizeKBytes = %d\n", i, plane->GPUVMMinPageSizeKBytes[i]); - dml_print("DML: plane_cfg: plane=%d, ForceOneRowForFrame = %d\n", i, plane->ForceOneRowForFrame[i]); - dml_print("DML: plane_cfg: plane=%d, PTEBufferModeOverrideEn = %d\n", i, plane->PTEBufferModeOverrideEn[i]); - dml_print("DML: plane_cfg: plane=%d, PTEBufferMode = %d\n", i, plane->PTEBufferMode[i]); - dml_print("DML: plane_cfg: plane=%d, DETSizeOverride = %d\n", i, plane->DETSizeOverride[i]); - dml_print("DML: plane_cfg: plane=%d, UseMALLForStaticScreen = %d\n", i, plane->UseMALLForStaticScreen[i]); - dml_print("DML: plane_cfg: plane=%d, UseMALLForPStateChange = %d\n", i, plane->UseMALLForPStateChange[i]); - dml_print("DML: plane_cfg: plane=%d, BlendingAndTiming = %d\n", i, plane->BlendingAndTiming[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportWidth = %d\n", i, plane->ViewportWidth[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportHeight = %d\n", i, plane->ViewportHeight[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportWidthChroma = %d\n", i, plane->ViewportWidthChroma[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportHeightChroma = %d\n", i, plane->ViewportHeightChroma[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportXStart = %d\n", i, plane->ViewportXStart[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportXStartC = %d\n", i, plane->ViewportXStartC[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportYStart = %d\n", i, plane->ViewportYStart[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportYStartC = %d\n", i, plane->ViewportYStartC[i]); - dml_print("DML: plane_cfg: plane=%d, ViewportStationary = %d\n", i, plane->ViewportStationary[i]); - dml_print("DML: plane_cfg: plane=%d, ScalerEnabled = %d\n", i, plane->ScalerEnabled[i]); - dml_print("DML: plane_cfg: plane=%d, HRatio = %3.2f\n", i, plane->HRatio[i]); - dml_print("DML: plane_cfg: plane=%d, VRatio = %3.2f\n", i, plane->VRatio[i]); - dml_print("DML: plane_cfg: plane=%d, HRatioChroma = %3.2f\n", i, plane->HRatioChroma[i]); - dml_print("DML: plane_cfg: plane=%d, VRatioChroma = %3.2f\n", i, plane->VRatioChroma[i]); - dml_print("DML: plane_cfg: plane=%d, HTaps = %d\n", i, plane->HTaps[i]); - dml_print("DML: plane_cfg: plane=%d, VTaps = %d\n", i, plane->VTaps[i]); - dml_print("DML: plane_cfg: plane=%d, HTapsChroma = %d\n", i, plane->HTapsChroma[i]); - dml_print("DML: plane_cfg: plane=%d, VTapsChroma = %d\n", i, plane->VTapsChroma[i]); - dml_print("DML: plane_cfg: plane=%d, LBBitPerPixel = %d\n", i, plane->LBBitPerPixel[i]); - dml_print("DML: plane_cfg: plane=%d, SourceScan = %d\n", i, plane->SourceScan[i]); - dml_print("DML: plane_cfg: plane=%d, ScalerRecoutWidth = %d\n", i, plane->ScalerRecoutWidth[i]); - dml_print("DML: plane_cfg: plane=%d, NumberOfCursors = %d\n", i, plane->NumberOfCursors[i]); - dml_print("DML: plane_cfg: plane=%d, CursorWidth = %d\n", i, plane->CursorWidth[i]); - dml_print("DML: plane_cfg: plane=%d, CursorBPP = %d\n", i, plane->CursorBPP[i]); - - dml_print("DML: plane_cfg: plane=%d, DynamicMetadataEnable = %d\n", i, plane->DynamicMetadataEnable[i]); - dml_print("DML: plane_cfg: plane=%d, DynamicMetadataLinesBeforeActiveRequired = %d\n", i, plane->DynamicMetadataLinesBeforeActiveRequired[i]); - dml_print("DML: plane_cfg: plane=%d, DynamicMetadataTransmittedBytes = %d\n", i, plane->DynamicMetadataTransmittedBytes[i]); - } -} - -void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane) -{ - for (dml_uint_t i = 0; i < num_plane; i++) { - dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]); - dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]); - dml_print("DML: surface_cfg: plane=%d, SurfaceHeightY = %d\n", i, surface->SurfaceHeightY[i]); - dml_print("DML: surface_cfg: plane=%d, PitchC = %d\n", i, surface->PitchC[i]); - dml_print("DML: surface_cfg: plane=%d, SurfaceWidthC = %d\n", i, surface->SurfaceWidthC[i]); - dml_print("DML: surface_cfg: plane=%d, SurfaceHeightC = %d\n", i, surface->SurfaceHeightC[i]); - dml_print("DML: surface_cfg: plane=%d, DCCEnable = %d\n", i, surface->DCCEnable[i]); - dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchY = %d\n", i, surface->DCCMetaPitchY[i]); - dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchC = %d\n", i, surface->DCCMetaPitchC[i]); - dml_print("DML: surface_cfg: plane=%d, DCCRateLuma = %f\n", i, surface->DCCRateLuma[i]); - dml_print("DML: surface_cfg: plane=%d, DCCRateChroma = %f\n", i, surface->DCCRateChroma[i]); - dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsLuma = %f\n", i, surface->DCCFractionOfZeroSizeRequestsLuma[i]); - dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsChroma= %f\n", i, surface->DCCFractionOfZeroSizeRequestsChroma[i]); - } -} - -void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane) -{ - for (dml_uint_t i = 0; i < num_plane; i++) { - dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]); - dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]); - dml_print("DML: hw_resource: plane=%d, DSCEnabled = %d\n", i, hw->DSCEnabled[i]); - dml_print("DML: hw_resource: plane=%d, NumberOfDSCSlices = %d\n", i, hw->NumberOfDSCSlices[i]); - } - dml_print("DML: hw_resource: DLGRefClkFreqMHz = %f\n", hw->DLGRefClkFreqMHz); -} - -__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state) -{ - dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz); - dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz); - dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz); - dml_print("DML: state_bbox: phyclk_d18_mhz = %f\n", state->phyclk_d18_mhz); - dml_print("DML: state_bbox: phyclk_d32_mhz = %f\n", state->phyclk_d32_mhz); - dml_print("DML: state_bbox: dtbclk_mhz = %f\n", state->dtbclk_mhz); - dml_print("DML: state_bbox: dispclk_mhz = %f\n", state->dispclk_mhz); - dml_print("DML: state_bbox: dppclk_mhz = %f\n", state->dppclk_mhz); - dml_print("DML: state_bbox: fabricclk_mhz = %f\n", state->fabricclk_mhz); - dml_print("DML: state_bbox: dcfclk_mhz = %f\n", state->dcfclk_mhz); - dml_print("DML: state_bbox: dram_speed_mts = %f\n", state->dram_speed_mts); - dml_print("DML: state_bbox: urgent_latency_pixel_data_only_us = %f\n", state->urgent_latency_pixel_data_only_us); - dml_print("DML: state_bbox: urgent_latency_pixel_mixed_with_vm_data_us = %f\n", state->urgent_latency_pixel_mixed_with_vm_data_us); - dml_print("DML: state_bbox: urgent_latency_vm_data_only_us = %f\n", state->urgent_latency_vm_data_only_us); - dml_print("DML: state_bbox: writeback_latency_us = %f\n", state->writeback_latency_us); - dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_component_us = %f\n", state->urgent_latency_adjustment_fabric_clock_component_us); - dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_reference_mhz= %f\n", state->urgent_latency_adjustment_fabric_clock_reference_mhz); - dml_print("DML: state_bbox: sr_exit_time_us = %f\n", state->sr_exit_time_us); - dml_print("DML: state_bbox: sr_enter_plus_exit_time_us = %f\n", state->sr_enter_plus_exit_time_us); - dml_print("DML: state_bbox: sr_exit_z8_time_us = %f\n", state->sr_exit_z8_time_us); - dml_print("DML: state_bbox: sr_enter_plus_exit_z8_time_us = %f\n", state->sr_enter_plus_exit_z8_time_us); - dml_print("DML: state_bbox: dram_clock_change_latency_us = %f\n", state->dram_clock_change_latency_us); - dml_print("DML: state_bbox: fclk_change_latency_us = %f\n", state->fclk_change_latency_us); - dml_print("DML: state_bbox: usr_retraining_latency_us = %f\n", state->usr_retraining_latency_us); - dml_print("DML: state_bbox: use_ideal_dram_bw_strobe = %d\n", state->use_ideal_dram_bw_strobe); -} - -__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc) -{ - dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz); - dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz); - dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz); - dml_print("DML: soc_bbox: refclk_mhz = %f\n", soc->refclk_mhz); - dml_print("DML: soc_bbox: amclk_mhz = %f\n", soc->amclk_mhz); - - dml_print("DML: soc_bbox: max_outstanding_reqs = %f\n", soc->max_outstanding_reqs); - dml_print("DML: soc_bbox: pct_ideal_sdp_bw_after_urgent = %f\n", soc->pct_ideal_sdp_bw_after_urgent); - dml_print("DML: soc_bbox: pct_ideal_fabric_bw_after_urgent = %f\n", soc->pct_ideal_fabric_bw_after_urgent); - dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_only); - dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_and_vm = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm); - dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_vm_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_vm_only); - dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_strobe = %f\n", soc->pct_ideal_dram_bw_after_urgent_strobe); - dml_print("DML: soc_bbox: max_avg_sdp_bw_use_normal_percent = %f\n", soc->max_avg_sdp_bw_use_normal_percent); - dml_print("DML: soc_bbox: max_avg_fabric_bw_use_normal_percent = %f\n", soc->max_avg_fabric_bw_use_normal_percent); - dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_percent = %f\n", soc->max_avg_dram_bw_use_normal_percent); - dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_strobe_percent = %f\n", soc->max_avg_dram_bw_use_normal_strobe_percent); - dml_print("DML: soc_bbox: round_trip_ping_latency_dcfclk_cycles = %d\n", soc->round_trip_ping_latency_dcfclk_cycles); - dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_only_bytes); - dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes); - dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_vm_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_vm_only_bytes); - dml_print("DML: soc_bbox: num_chans = %d\n", soc->num_chans); - dml_print("DML: soc_bbox: return_bus_width_bytes = %d\n", soc->return_bus_width_bytes); - dml_print("DML: soc_bbox: dram_channel_width_bytes = %d\n", soc->dram_channel_width_bytes); - dml_print("DML: soc_bbox: fabric_datapath_to_dcn_data_return_bytes = %d\n", soc->fabric_datapath_to_dcn_data_return_bytes); - dml_print("DML: soc_bbox: hostvm_min_page_size_kbytes = %d\n", soc->hostvm_min_page_size_kbytes); - dml_print("DML: soc_bbox: gpuvm_min_page_size_kbytes = %d\n", soc->gpuvm_min_page_size_kbytes); - dml_print("DML: soc_bbox: phy_downspread_percent = %f\n", soc->phy_downspread_percent); - dml_print("DML: soc_bbox: dcn_downspread_percent = %f\n", soc->dcn_downspread_percent); - dml_print("DML: soc_bbox: smn_latency_us = %f\n", soc->smn_latency_us); - dml_print("DML: soc_bbox: mall_allocated_for_dcn_mbytes = %d\n", soc->mall_allocated_for_dcn_mbytes); - dml_print("DML: soc_bbox: dispclk_dppclk_vco_speed_mhz = %f\n", soc->dispclk_dppclk_vco_speed_mhz); - dml_print("DML: soc_bbox: do_urgent_latency_adjustment = %d\n", soc->do_urgent_latency_adjustment); -} - -__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) -{ - dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n"); - dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); - dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); - - dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz); - dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz); - - for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { - dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]); - dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]); - } -} - -dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle Scan) -{ - dml_bool_t is_vert = false; - if (Scan == dml_rotation_90 || Scan == dml_rotation_90m || Scan == dml_rotation_270 || Scan == dml_rotation_270m) { - is_vert = true; - } else { - is_vert = false; - } - return is_vert; -} // dml_is_vertical_rotation - -dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp) -{ - switch (ebpp) { - case dml_cur_2bit: - return 2; - case dml_cur_32bit: - return 32; - case dml_cur_64bit: - return 64; - default: - return 0; - } -} - -/// @brief Determine the physical pipe to logical plane mapping using the display_cfg -dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg) -{ - dml_uint_t num_active_planes = 0; - - for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; k++) { - if (display_cfg->plane.ViewportWidth[k] > 0) - num_active_planes = num_active_planes + 1; - } -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: num_active_planes = %d\n", __func__, num_active_planes); -#endif - return num_active_planes; -} - -/// @brief Determine the physical pipe to logical plane mapping using the display_cfg -dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg) -{ - dml_uint_t num_active_pipes = 0; - - for (dml_uint_t j = 0; j < dml_get_num_active_planes(display_cfg); j++) { - num_active_pipes = num_active_pipes + display_cfg->hw.DPPPerSurface[j]; - } - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); -#endif - return num_active_pipes; -} - -dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) -{ - dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; - return plane_idx; -} - -dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx) -{ - dml_uint_t pipe_idx = 0; - dml_bool_t pipe_found = 0; - - ASSERT(plane_idx < __DML_NUM_PLANES__); - - for (dml_uint_t i = 0; i < __DML_NUM_PLANES__; i++) { - if (plane_idx == mode_lib->mp.pipe_plane[i]) { - pipe_idx = i; - pipe_found = 1; - break; - } - } - ASSERT(pipe_found != 0); - - return pipe_idx; -} - -void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane) -{ - dml_uint_t pipe_idx = 0; - - for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) { - pipe_plane[k] = __DML_PIPE_NO_PLANE__; - } - - for (dml_uint_t plane_idx = 0; plane_idx < __DML_NUM_PLANES__; plane_idx++) { - for (dml_uint_t i = 0; i < hw->DPPPerSurface[plane_idx]; i++) { - pipe_plane[pipe_idx] = plane_idx; - pipe_idx++; - } - } -} - - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h deleted file mode 100644 index 113b0265e1d1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DISPLAY_MODE_UTIL_H__ -#define __DISPLAY_MODE_UTIL_H__ - -#include "display_mode_core_structs.h" -#include "cmntypes.h" - - -#include "dml_assert.h" -#include "dml_logging.h" - -__DML_DLL_EXPORT__ dml_bool_t dml_util_is_420(enum dml_source_format_class source_format); -__DML_DLL_EXPORT__ dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity); -__DML_DLL_EXPORT__ dml_float_t dml_floor(dml_float_t x, dml_float_t granularity); -__DML_DLL_EXPORT__ dml_float_t dml_min(dml_float_t x, dml_float_t y); -__DML_DLL_EXPORT__ dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z); -__DML_DLL_EXPORT__ dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w); -__DML_DLL_EXPORT__ dml_float_t dml_max(dml_float_t x, dml_float_t y); -__DML_DLL_EXPORT__ dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z); -__DML_DLL_EXPORT__ dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d); -__DML_DLL_EXPORT__ dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e); -__DML_DLL_EXPORT__ dml_float_t dml_log(dml_float_t x, dml_float_t base); -__DML_DLL_EXPORT__ dml_float_t dml_log2(dml_float_t x); -__DML_DLL_EXPORT__ dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding); -__DML_DLL_EXPORT__ dml_float_t dml_pow(dml_float_t base, int exp); -__DML_DLL_EXPORT__ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up); -__DML_DLL_EXPORT__ dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle scan); -__DML_DLL_EXPORT__ dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp); -__DML_DLL_EXPORT__ void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *data_rq_regs); -__DML_DLL_EXPORT__ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs); -__DML_DLL_EXPORT__ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs); -__DML_DLL_EXPORT__ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs); -__DML_DLL_EXPORT__ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy); -__DML_DLL_EXPORT__ void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j); -__DML_DLL_EXPORT__ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only); -__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane); -__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane); -__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane); -__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane); -__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state); -__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc); -__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg); - -__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg); -__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg); -__DML_DLL_EXPORT__ dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); -__DML_DLL_EXPORT__ dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx); -__DML_DLL_EXPORT__ void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane); - - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c deleted file mode 100644 index bf5e7f4e0416..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ /dev/null @@ -1,929 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml21_wrapper.h" -#include "dml2_core_dcn4_calcs.h" -#include "dml2_internal_shared_types.h" -#include "dml2_internal_types.h" -#include "dml21_utils.h" -#include "dml21_translation_helper.h" -#include "soc_and_ip_translator.h" - -static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, - const struct dc *in_dc, - const struct dml2_configuration_options *config) -{ - bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable; - - /* ODM options */ - pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; - pmo_options->disable_dyn_odm_for_multi_stream = true; - pmo_options->disable_dyn_odm_for_stream_with_svp = true; - - pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); - - /* NOTE: DRR and SubVP Require FAMS2 */ - pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || - in_dc->debug.force_disable_subvp || - disable_fams2; - pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || - disable_fams2; - pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || - disable_fams2; - pmo_options->disable_fams2 = disable_fams2; - - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || - in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; - pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; -} - -static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version) -{ - enum dml2_project_id project_id; - switch (dcn_version) { - case DCN_VERSION_4_01: - project_id = dml2_project_dcn4x_stage2_auto_drr_svp; - break; - default: - project_id = dml2_project_invalid; - DC_ERR("unsupported dcn version for DML21!"); - break; - } - - return project_id; -} - -void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version); - - if (config->use_native_soc_bb_construction) { - in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config); - in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps); - } else { - dml_init->soc_bb = config->external_socbb_ip_params->soc_bb; - dml_init->ip_caps = config->external_socbb_ip_params->ip_params; - } - - dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); -} - -static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) -{ - unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; - - if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { - max_hw_v_total -= stream->timing.v_front_porch + 1; - } - - return max_hw_v_total; -} - -static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, - struct dc_stream_state *stream, - struct pipe_ctx *pipe_ctx, - struct dml2_context *dml_ctx) -{ - unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; - uint32_t pix_clk_100hz; - - timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; - timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; - timing->h_front_porch = stream->timing.h_front_porch; - timing->v_front_porch = stream->timing.v_front_porch; - timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; - if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) - timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10; - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - timing->pixel_clock_khz *= 2; - timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; - timing->v_total = stream->timing.v_total; - timing->h_sync_width = stream->timing.h_sync_width; - timing->interlaced = stream->timing.flags.INTERLACE; - - hblank_start = stream->timing.h_total - stream->timing.h_front_porch; - - timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding - - stream->timing.h_border_left - stream->timing.h_border_right; - - if (hblank_start < stream->timing.h_addressable) - timing->h_blank_end = 0; - - vblank_start = stream->timing.v_total - stream->timing.v_front_porch; - - timing->v_blank_end = vblank_start - stream->timing.v_addressable - - stream->timing.v_border_top - stream->timing.v_border_bottom; - - timing->drr_config.enabled = stream->ignore_msa_timing_param; - timing->drr_config.drr_active_variable = stream->vrr_active_variable; - timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; - timing->drr_config.disallowed = !stream->allow_freesync; - - /* limit min refresh rate to DC cap */ - min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; - if (stream->ctx->dc->caps.max_v_total != 0) { - if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) { - pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; - } else { - pix_clk_100hz = stream->timing.pix_clk_100hz; - } - min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL), - (timing->h_total * (long long)calc_max_hardware_v_total(stream))); - } - - timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); - - if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && - stream->ctx->dc->config.enable_fpo_flicker_detection == 1) - timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false); - else - timing->drr_config.max_instant_vtotal_delta = 0; - - if (stream->timing.flags.DSC) { - timing->dsc.enable = dml2_dsc_enable; - timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h; - timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel; - } else - timing->dsc.enable = dml2_dsc_disable; - - switch (stream->timing.display_color_depth) { - case COLOR_DEPTH_666: - timing->bpc = 6; - break; - case COLOR_DEPTH_888: - timing->bpc = 8; - break; - case COLOR_DEPTH_101010: - timing->bpc = 10; - break; - case COLOR_DEPTH_121212: - timing->bpc = 12; - break; - case COLOR_DEPTH_141414: - timing->bpc = 14; - break; - case COLOR_DEPTH_161616: - timing->bpc = 16; - break; - case COLOR_DEPTH_999: - timing->bpc = 9; - break; - case COLOR_DEPTH_111111: - timing->bpc = 11; - break; - default: - timing->bpc = 8; - break; - } - - timing->vblank_nom = timing->v_total - timing->v_active; -} - -static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, - struct dc_stream_state *stream, const struct pipe_ctx *pipe) -{ - output->output_dp_lane_count = 4; - - switch (stream->signal) { - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_DISPLAY_PORT: - output->output_encoder = dml2_dp; - if (check_dp2p0_output_encoder(pipe)) - output->output_encoder = dml2_dp2p0; - break; - case SIGNAL_TYPE_EDP: - output->output_encoder = dml2_edp; - break; - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - output->output_encoder = dml2_hdmi; - break; - default: - output->output_encoder = dml2_dp; - } - - switch (stream->timing.pixel_encoding) { - case PIXEL_ENCODING_RGB: - case PIXEL_ENCODING_YCBCR444: - output->output_format = dml2_444; - break; - case PIXEL_ENCODING_YCBCR420: - output->output_format = dml2_420; - break; - case PIXEL_ENCODING_YCBCR422: - if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple) - output->output_format = dml2_n422; - else - output->output_format = dml2_s422; - break; - default: - output->output_format = dml2_444; - break; - } - - switch (stream->signal) { - case SIGNAL_TYPE_NONE: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_LVDS: - case SIGNAL_TYPE_RGB: - case SIGNAL_TYPE_DISPLAY_PORT: - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_EDP: - case SIGNAL_TYPE_VIRTUAL: - default: - output->output_dp_link_rate = dml2_dp_rate_na; - break; - } - - output->audio_sample_layout = stream->audio_info.modes->sample_size; - output->audio_sample_rate = stream->audio_info.modes->max_bit_rate; - output->output_disabled = true; - - //TODO : New to DML2.1. How do we populate this ? - // output->validate_output -} - -static void populate_dml21_stream_overrides_from_stream_state( - struct dml2_stream_parameters *stream_desc, - struct dc_stream_state *stream, - struct dc_stream_status *stream_status) -{ - switch (stream->debug.force_odm_combine_segments) { - case 0: - stream_desc->overrides.odm_mode = dml2_odm_mode_auto; - break; - case 1: - stream_desc->overrides.odm_mode = dml2_odm_mode_bypass; - break; - case 2: - stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - stream_desc->overrides.odm_mode = dml2_odm_mode_auto; - break; - } - if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || - stream->debug.force_odm_combine_segments > 0) - stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || - stream->hw_cursor_req || - stream_status->mall_stream_config.cursor_size_limit_subvp; -} - -static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) -{ - enum dml2_swizzle_mode dml2_mode = dml2_sw_linear; - - switch (addr3_mode) { - case DC_ADDR3_SW_LINEAR: - dml2_mode = dml2_sw_linear; - break; - case DC_ADDR3_SW_256B_2D: - dml2_mode = dml2_sw_256b_2d; - break; - case DC_ADDR3_SW_4KB_2D: - dml2_mode = dml2_sw_4kb_2d; - break; - case DC_ADDR3_SW_64KB_2D: - dml2_mode = dml2_sw_64kb_2d; - break; - case DC_ADDR3_SW_256KB_2D: - dml2_mode = dml2_sw_256kb_2d; - break; - default: - /* invalid swizzle mode for DML2.1 */ - ASSERT(false); - dml2_mode = dml2_sw_linear; - } - - return dml2_mode; -} - -static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode) -{ - enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d; - - switch (gfx9_mode) { - case DC_SW_LINEAR: - dml2_mode = dml2_sw_linear; - break; - case DC_SW_256_D: - case DC_SW_256_R: - dml2_mode = dml2_sw_256b_2d; - break; - case DC_SW_4KB_D: - case DC_SW_4KB_R: - case DC_SW_4KB_R_X: - dml2_mode = dml2_sw_4kb_2d; - break; - case DC_SW_64KB_D: - case DC_SW_64KB_D_X: - case DC_SW_64KB_R: - case DC_SW_64KB_R_X: - dml2_mode = dml2_sw_64kb_2d; - break; - case DC_SW_256B_S: - case DC_SW_4KB_S: - case DC_SW_64KB_S: - case DC_SW_VAR_S: - case DC_SW_VAR_D: - case DC_SW_VAR_R: - case DC_SW_64KB_S_T: - case DC_SW_64KB_D_T: - case DC_SW_4KB_S_X: - case DC_SW_4KB_D_X: - case DC_SW_64KB_S_X: - case DC_SW_VAR_S_X: - case DC_SW_VAR_D_X: - case DC_SW_VAR_R_X: - default: - /* - * invalid swizzle mode for DML2.1. This could happen because - * DML21 is not intended to be used by N-1 in production. To - * properly filter out unsupported swizzle modes, we will need - * to fix capability reporting when DML2.1 is used for N-1 in - * dc. So DML will only receive DML21 supported swizzle modes. - * This implementation is not added and has a low value because - * the supported swizzle modes should already cover most of our - * N-1 test cases. - */ - return dml2_sw_64kb_2d; - } - - return dml2_mode; -} - -static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream) -{ - surface->plane0.width = stream->timing.h_addressable; - surface->plane0.height = stream->timing.v_addressable; - surface->plane1.width = stream->timing.h_addressable; - surface->plane1.height = stream->timing.v_addressable; - surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128; - surface->plane1.pitch = 0; - surface->dcc.enable = false; - surface->dcc.informative.dcc_rate_plane0 = 1.0; - surface->dcc.informative.dcc_rate_plane1 = 1.0; - surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0; - surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0; - surface->tiling = dml2_sw_64kb_2d; -} - -static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream) -{ - unsigned int width, height; - - if (stream->timing.h_addressable > 3840) - width = 3840; - else - width = stream->timing.h_addressable; // 4K max - - if (stream->timing.v_addressable > 2160) - height = 2160; - else - height = stream->timing.v_addressable; // 4K max - - plane->cursor.cursor_bpp = 32; - - plane->cursor.cursor_width = 256; - plane->cursor.num_cursors = 1; - - plane->composition.viewport.plane0.width = width; - plane->composition.viewport.plane0.height = height; - plane->composition.viewport.plane1.width = 0; - plane->composition.viewport.plane1.height = 0; - - plane->composition.viewport.stationary = false; - plane->composition.viewport.plane0.x_start = 0; - plane->composition.viewport.plane0.y_start = 0; - plane->composition.viewport.plane1.x_start = 0; - plane->composition.viewport.plane1.y_start = 0; - - plane->composition.scaler_info.enabled = false; - plane->composition.rotation_angle = dml2_rotation_0; - plane->composition.scaler_info.plane0.h_ratio = 1.0; - plane->composition.scaler_info.plane0.v_ratio = 1.0; - plane->composition.scaler_info.plane1.h_ratio = 0; - plane->composition.scaler_info.plane1.v_ratio = 0; - plane->composition.scaler_info.plane0.h_taps = 1; - plane->composition.scaler_info.plane0.v_taps = 1; - plane->composition.scaler_info.plane1.h_taps = 0; - plane->composition.scaler_info.plane1.v_taps = 0; - plane->composition.scaler_info.rect_out_width = width; - plane->pixel_format = dml2_444_32; - - plane->dynamic_meta_data.enable = false; - plane->overrides.gpuvm_min_page_size_kbytes = 256; -} - -static void populate_dml21_surface_config_from_plane_state( - const struct dc *in_dc, - struct dml2_surface_cfg *surface, - const struct dc_plane_state *plane_state) -{ - surface->plane0.pitch = plane_state->plane_size.surface_pitch; - surface->plane1.pitch = plane_state->plane_size.chroma_pitch; - surface->plane0.height = plane_state->plane_size.surface_size.height; - surface->plane0.width = plane_state->plane_size.surface_size.width; - surface->plane1.height = plane_state->plane_size.chroma_size.height; - surface->plane1.width = plane_state->plane_size.chroma_size.width; - surface->dcc.enable = plane_state->dcc.enable; - surface->dcc.informative.dcc_rate_plane0 = 1.0; - surface->dcc.informative.dcc_rate_plane1 = 1.0; - surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks; - surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; - surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; - surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; - - // Update swizzle / array mode based on the gfx_format - switch (plane_state->tiling_info.gfxversion) { - case DcGfxVersion7: - case DcGfxVersion8: - break; - case DcGfxVersion9: - case DcGfxVersion10: - case DcGfxVersion11: - surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); - break; - case DcGfxAddr3: - surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); - break; - } -} - -static const struct scaler_data *get_scaler_data_for_plane( - struct dml2_context *dml_ctx, - const struct dc_plane_state *in, - const struct dc_state *context) -{ - int i; - struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe; - - memset(temp_pipe, 0, sizeof(struct pipe_ctx)); - - for (i = 0; i < MAX_PIPES; i++) { - const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state == in && !pipe->prev_odm_pipe) { - temp_pipe->stream = pipe->stream; - temp_pipe->plane_state = pipe->plane_state; - temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; - temp_pipe->stream_res = pipe->stream_res; - temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding; - temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding; - temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz; - dml_ctx->config.callbacks.build_scaling_params(temp_pipe); - break; - } - } - - ASSERT(i < MAX_PIPES); - return &temp_pipe->plane_res.scl_data; -} - -static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx, - struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state, - const struct dc_state *context, unsigned int stream_index) -{ - const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); - struct dc_stream_state *stream = context->streams[stream_index]; - - plane->cursor.cursor_bpp = 32; - plane->cursor.cursor_width = 256; - plane->cursor.num_cursors = 1; - - switch (plane_state->format) { - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: - plane->pixel_format = dml2_420_8; - break; - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: - plane->pixel_format = dml2_420_10; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - plane->pixel_format = dml2_444_64; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - plane->pixel_format = dml2_444_16; - break; - case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: - plane->pixel_format = dml2_444_8; - break; - case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: - plane->pixel_format = dml2_rgbe_alpha; - break; - default: - plane->pixel_format = dml2_444_32; - break; - } - - plane->composition.viewport.plane0.height = scaler_data->viewport.height; - plane->composition.viewport.plane0.width = scaler_data->viewport.width; - plane->composition.viewport.plane1.height = scaler_data->viewport_c.height; - plane->composition.viewport.plane1.width = scaler_data->viewport_c.width; - plane->composition.viewport.plane0.x_start = scaler_data->viewport.x; - plane->composition.viewport.plane0.y_start = scaler_data->viewport.y; - plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x; - plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y; - plane->composition.viewport.stationary = false; - plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value || - scaler_data->ratios.horz_c.value != dc_fixpt_one.value || - scaler_data->ratios.vert.value != dc_fixpt_one.value || - scaler_data->ratios.vert_c.value != dc_fixpt_one.value; - - if (!scaler_data->taps.h_taps) { - /* Above logic determines scaling should be enabled even when there are no taps for - * certain cases. Hence do corrective active and disable scaling. - */ - plane->composition.scaler_info.enabled = false; - } else if ((plane_state->ctx->dc->config.use_spl == true) && - (plane->composition.scaler_info.enabled == false)) { - /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then - * allow case where ratio is 1 but taps > 1 - */ - if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || - (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) - plane->composition.scaler_info.enabled = true; - } - - /* always_scale is only used for debug purposes not used in production but has to be - * maintained for certain complainces. */ - if (plane_state->ctx->dc->debug.always_scale == true) { - plane->composition.scaler_info.enabled = true; - } - - if (plane->composition.scaler_info.enabled == false) { - plane->composition.scaler_info.plane0.h_ratio = 1.0; - plane->composition.scaler_info.plane0.v_ratio = 1.0; - plane->composition.scaler_info.plane1.h_ratio = 1.0; - plane->composition.scaler_info.plane1.v_ratio = 1.0; - } else { - plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32); - plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32); - plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32); - plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32); - } - - if (!scaler_data->taps.h_taps) { - plane->composition.scaler_info.plane0.h_taps = 1; - plane->composition.scaler_info.plane1.h_taps = 1; - } else { - plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps; - plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c; - } - if (!scaler_data->taps.v_taps) { - plane->composition.scaler_info.plane0.v_taps = 1; - plane->composition.scaler_info.plane1.v_taps = 1; - } else { - plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps; - plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c; - } - - plane->composition.viewport.stationary = false; - - if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { - plane->tdlut.setup_for_tdlut = true; - - switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { - case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: - case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: - plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear; - break; - case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: - plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; - break; - } - - switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { - case DC_CM2_GPU_MEM_SIZE_171717: - plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; - break; - case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: - default: - //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined - break; - } - } - plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; - - plane->dynamic_meta_data.enable = false; - plane->dynamic_meta_data.lines_before_active_required = 0; - plane->dynamic_meta_data.transmitted_bytes = 0; - - plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width; - plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation; - plane->stream_index = stream_index; - - plane->overrides.gpuvm_min_page_size_kbytes = 256; - - plane->immediate_flip = plane_state->flip_immediate; - - plane->composition.rect_out_height_spans_vactive = - plane_state->dst_rect.height >= stream->src.height && - stream->dst.height >= stream->timing.v_addressable; -} - -//TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) -{ - int i, j; - - if (!plane_id) - return false; - - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->stream_id == stream_id) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; - } - } - } - } - - return false; -} - -static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream) -{ - int i = 0; - int location = -1; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { - location = i; - break; - } - } - - return location; -} - -unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, - const struct dc_plane_state *plane, const struct dc_state *context) -{ - unsigned int plane_id; - int i = 0; - int location = -1; - - if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { - ASSERT(false); - return -1; - } - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { - location = i; - break; - } - } - - return location; -} - -static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method) -{ - enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto; - - switch (force_pstate_method) { - case dml2_force_pstate_method_vactive: - val = dml2_uclk_pstate_change_strategy_force_vactive; - break; - case dml2_force_pstate_method_vblank: - val = dml2_uclk_pstate_change_strategy_force_vblank; - break; - case dml2_force_pstate_method_drr: - val = dml2_uclk_pstate_change_strategy_force_drr; - break; - case dml2_force_pstate_method_subvp: - val = dml2_uclk_pstate_change_strategy_force_mall_svp; - break; - case dml2_force_pstate_method_auto: - default: - val = dml2_uclk_pstate_change_strategy_auto; - } - - return val; -} - -bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) -{ - int stream_index, plane_index; - int disp_cfg_stream_location, disp_cfg_plane_location; - struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; - unsigned int plane_count = 0; - - memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); - - dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable; - dml_dispcfg->gpuvm_max_page_table_levels = 4; - dml_dispcfg->hostvm_enable = false; - dml_dispcfg->minimize_det_reallocation = true; - dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; - - if (in_dc->debug.disable_unbounded_requesting) { - dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; - dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; - } - - for (stream_index = 0; stream_index < context->stream_count; stream_index++) { - disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); - - if (disp_cfg_stream_location < 0) - disp_cfg_stream_location = dml_dispcfg->num_streams++; - - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx); - populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); - populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); - - dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed; - dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed; - dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed; - - dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id; - dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; - - if (context->stream_status[stream_index].plane_count == 0) { - disp_cfg_plane_location = dml_dispcfg->num_planes++; - populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]); - populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]); - dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - } else { - for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); - - if (disp_cfg_plane_location < 0) - disp_cfg_plane_location = dml_dispcfg->num_planes++; - - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - - populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); - populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); - dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - - if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) - dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; - - /* apply forced pstate policy */ - if (dml_ctx->config.pmo.force_pstate_method_enable) { - dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = - dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); - } - - plane_count++; - } - } - } - - if (plane_count == 0) { - dml_dispcfg->overrides.all_streams_blanked = true; - } - - return true; -} - -void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) -{ - /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ - context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; - context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; - context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; - context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; - context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; - context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; - context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency; - context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency; -} - -static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) -{ - struct dml2_dchub_watermark_regs *wm_regs = NULL; - - switch (wm_index) { - case DML2_DCHUB_WATERMARK_SET_A: - wm_regs = &watermarks->dcn4x.a; - break; - case DML2_DCHUB_WATERMARK_SET_B: - wm_regs = &watermarks->dcn4x.b; - break; - case DML2_DCHUB_WATERMARK_SET_C: - wm_regs = &watermarks->dcn4x.c; - break; - case DML2_DCHUB_WATERMARK_SET_D: - wm_regs = &watermarks->dcn4x.d; - break; - case DML2_DCHUB_WATERMARK_SET_NUM: - default: - /* invalid wm set index */ - wm_regs = NULL; - } - - return wm_regs; -} - -void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx) -{ - const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming; - - unsigned int wm_index; - - /* copy watermark sets from DML */ - for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) { - struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index); - - if (wm_regs) - memcpy(wm_regs, - &programming->global_regs.wm_regs[wm_index], - sizeof(struct dml2_dchub_watermark_regs)); - } -} - -void dml21_map_hw_resources(struct dml2_context *dml_ctx) -{ - unsigned int i = 0; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; - dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true; - dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; - dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true; - } - -} - -void dml21_get_pipe_mcache_config( - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct dml2_per_plane_programming *pln_prog, - struct dml2_pipe_configuration_descriptor *mcache_pipe_config) -{ - mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x; - mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width; - - mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x; - mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width; - - mcache_pipe_config->plane1_enabled = - dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); -} - -void dml21_set_dc_p_state_type( - struct pipe_ctx *pipe_ctx, - struct dml2_per_stream_programming *stream_programming, - bool sub_vp_enabled) -{ - switch (stream_programming->uclk_pstate_method) { - case dml2_pstate_method_vactive: - case dml2_pstate_method_fw_vactive_drr: - pipe_ctx->p_state_type = P_STATE_V_ACTIVE; - break; - case dml2_pstate_method_vblank: - case dml2_pstate_method_fw_vblank_drr: - if (sub_vp_enabled) - pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; - else - pipe_ctx->p_state_type = P_STATE_V_BLANK; - break; - case dml2_pstate_method_fw_svp: - case dml2_pstate_method_fw_svp_drr: - pipe_ctx->p_state_type = P_STATE_SUB_VP; - break; - case dml2_pstate_method_fw_drr: - if (sub_vp_enabled) - pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; - else - pipe_ctx->p_state_type = P_STATE_FPO; - break; - default: - pipe_ctx->p_state_type = P_STATE_UNKNOWN; - break; - } -} - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h deleted file mode 100644 index 9880d3e0398e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef _DML21_TRANSLATION_HELPER_H_ -#define _DML21_TRANSLATION_HELPER_H_ - -struct dc; -struct dc_state; -struct dcn_watermarks; -union dcn_watermark_set; -struct pipe_ctx; -struct dc_plane_state; - -struct dml2_context; -struct dml2_configuration_options; -struct dml2_initialize_instance_in_out; - -void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); -bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); -void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); -void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); -void dml21_map_hw_resources(struct dml2_context *dml_ctx); -void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); -void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); -unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c deleted file mode 100644 index ee721606b883..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ /dev/null @@ -1,516 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_internal_shared_types.h" -#include "dml21_translation_helper.h" -#include "dml2_internal_types.h" -#include "dml21_utils.h" -#include "dml2_dc_resource_mgmt.h" - -#include "dml2_core_dcn4_calcs.h" - -int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) -{ - int i; - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) - return i; - } - - return -1; -} - -int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) -{ - int i; - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) - return i; - } - - return -1; -} - -bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id) -{ - int i, j; - - if (!plane_id) - return false; - - for (i = 0; i < state->stream_count; i++) { - for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; - } - } - } - - return false; -} - -unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id) -{ - return 0xffff & plane_id; -} - -void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index) -{ - unsigned int i = 0; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) { - *dml_pipe_idx = i; - return; - } - } -} - -void find_pipe_regs_idx(const struct dml2_context *dml_ctx, - struct pipe_ctx *pipe, unsigned int *pipe_regs_idx) -{ - struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe); - - *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head); - - if (pipe->plane_state) - *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe); -} - -/* places pipe references into pipes arrays and returns number of pipes */ -int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, - struct dc_state *context, - struct dml2_context *dml_ctx, - struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], - struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], - int dml_plane_idx) -{ - unsigned int dml_stream_index; - unsigned int main_stream_id; - unsigned int dc_plane_index; - struct dc_stream_state *dc_main_stream; - struct dc_stream_status *dc_main_stream_status; - struct dc_plane_state *dc_main_plane; - struct dc_stream_state *dc_phantom_stream; - struct dc_stream_status *dc_phantom_stream_status; - struct dc_plane_state *dc_phantom_plane; - int num_pipes = 0; - - memset(dc_main_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); - memset(dc_phantom_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); - - dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index; - main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]; - - dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id); - dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream); - if (!dc_main_stream_status) - return num_pipes; - - /* find main plane based on id */ - dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]); - dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index]; - - if (dc_main_plane) { - num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes); - } else { - /* stream was configured with dummy plane, so get pipes from opp head */ - struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream); - if (otg_master_pipe != NULL) - num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes); - } - - /* if phantom exists, find associated pipes */ - dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream); - if (dc_phantom_stream && num_pipes > 0) { - dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream); - - if (dc_phantom_stream_status) { - /* phantom plane will have same index as main */ - dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index]; - - if (dc_phantom_plane) { - /* only care about phantom pipes if they contain the phantom plane */ - dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes); - } - } - } - - return num_pipes; -} - -void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct dml2_per_stream_programming *stream_programming) -{ - union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; - - if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { - /* phantom has its own global sync */ - global_sync = &stream_programming->phantom_stream.global_sync; - } - - memcpy(&pipe_ctx->global_sync, - global_sync, - sizeof(union dml2_global_sync_programming)); -} - -void dml21_populate_mall_allocation_size(struct dc_state *context, - struct dml2_context *in_ctx, - struct dml2_per_plane_programming *pln_prog, - struct pipe_ctx *dc_pipe) -{ - - /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ - /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ - if (dc_pipe->stream && dc_pipe->plane_state && - (dc_pipe->top_pipe == NULL || - dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) && - dc_pipe->prev_odm_pipe == NULL) { - /* SS: all active surfaces stored in MALL */ - if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) { - dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes; - context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes; - } else { - /* SUBVP: phantom surfaces only stored in MALL */ - dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes; - context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes; - } - } -} - -bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) -{ - /* If this assert is hit then we have a link encoder dynamic management issue */ - ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal)); -} - - -static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && - dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { - return true; - } - } - return false; -} - - -void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, - struct dml2_per_stream_programming *stream_prog) -{ - unsigned int pipe_reg_index = 0; - - dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); - find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); - - if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { - memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); - pipe_ctx->unbounded_req = false; - pipe_ctx->det_buffer_size_kb = 0; - } else { - memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); - pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; - pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; - } - - pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; - - dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); - - bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); - - dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); -} - -static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *main_stream, - struct dml2_per_stream_programming *stream_programming) -{ - struct dc_stream_state *phantom_stream; - struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor; - - phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream); - if (!phantom_stream) - return NULL; - - /* copy details of phantom stream from main */ - memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing)); - memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src)); - memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst)); - - /* modify timing for phantom */ - phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch; - phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active; - phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total; - phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active; - - phantom_stream->src.y = 0; - phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height; - - phantom_stream->use_dynamic_meta = false; - - dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream); - - return phantom_stream; -} - -static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx, - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *phantom_stream, - struct dc_plane_state *main_plane, - struct dml2_per_plane_programming *plane_programming) -{ - struct dc_plane_state *phantom_plane; - - phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane); - if (!phantom_plane) - return NULL; - - phantom_plane->format = main_plane->format; - phantom_plane->rotation = main_plane->rotation; - phantom_plane->visible = main_plane->visible; - - memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address)); - memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality, - sizeof(phantom_plane->scaling_quality)); - memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect)); - memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect)); - memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect)); - memcpy(&phantom_plane->plane_size, &main_plane->plane_size, - sizeof(phantom_plane->plane_size)); - memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info, - sizeof(phantom_plane->tiling_info)); - memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc)); - - phantom_plane->format = main_plane->format; - phantom_plane->rotation = main_plane->rotation; - phantom_plane->visible = main_plane->visible; - - /* Shadow pipe has small viewport. */ - phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->src.height; - - dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context); - - return phantom_plane; -} - -void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx) -{ - unsigned int dml_stream_index, dml_plane_index, dc_plane_index; - struct dc_stream_state *main_stream; - struct dc_stream_status *main_stream_status; - struct dc_stream_state *phantom_stream; - struct dc_plane_state *main_plane; - bool phantoms_added = false; - - /* create phantom streams and planes and add to context */ - for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) { - /* iterate through DML streams looking for phantoms */ - if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) { - /* find associated dc stream */ - main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, - dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]); - - main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream); - - if (!main_stream_status || main_stream_status->plane_count == 0) - continue; - - /* create phantom stream for subvp enabled stream */ - phantom_stream = dml21_add_phantom_stream(dml_ctx, - dc, - context, - main_stream, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]); - - if (!phantom_stream) - continue; - - /* iterate through DML planes associated with this stream */ - for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) { - if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) { - /* find associated dc plane */ - dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]); - main_plane = main_stream_status->plane_states[dc_plane_index]; - - /* create phantom planes for subvp enabled plane */ - dml21_add_phantom_plane(dml_ctx, - dc, - context, - phantom_stream, - main_plane, - &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]); - - phantoms_added = true; - } - } - } - } - - if (phantoms_added) - dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state); -} - -void dml21_build_fams2_programming(const struct dc *dc, - struct dc_state *context, - struct dml2_context *dml_ctx) -{ - int i, j, k; - unsigned int num_fams2_streams = 0; - - /* reset fams2 data */ - memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); - memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); - memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES); - memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); - - if (dml_ctx->v21.mode_programming.programming->fams2_required) { - for (i = 0; i < context->stream_count; i++) { - int dml_stream_idx; - struct dc_stream_state *phantom_stream; - struct dc_stream_status *phantom_status; - enum fams2_stream_type type = 0; - - union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams]; - union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams]; - - struct dc_stream_state *stream = context->streams[i]; - - if (context->stream_status[i].plane_count == 0 || - dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { - /* can ignore blanked or phantom streams */ - continue; - } - - dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); - if (dml_stream_idx < 0) { - ASSERT(dml_stream_idx >= 0); - continue; - } - - /* copy static state from PMO */ - memcpy(static_base_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, - sizeof(union dmub_cmd_fams2_config)); - - if (dc->debug.fams_version.major == 3) { - memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams], - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2, - sizeof(union dmub_fams2_stream_static_sub_state_v2)); - } else { - memcpy(static_sub_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, - sizeof(union dmub_cmd_fams2_config)); - } - - switch (dc->debug.fams_version.minor) { - case 1: - default: - type = static_base_state->stream_v1.base.type; - - /* get information from context */ - static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count; - static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_base_state->stream_v1.base.pipe_mask |= (1 << k); - static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k); - } - } - } - } - - - /* get per method programming */ - switch (type) { - case FAMS2_STREAM_TYPE_VBLANK: - case FAMS2_STREAM_TYPE_VACTIVE: - case FAMS2_STREAM_TYPE_DRR: - break; - case FAMS2_STREAM_TYPE_SUBVP: - phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); - if (!phantom_stream) - break; - - phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); - - /* phantom status should always be present */ - ASSERT(phantom_status); - if (!phantom_status) - break; - - switch (dc->debug.fams_version.minor) { - case 1: - default: - static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; - - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - switch (dc->debug.fams_version.minor) { - case 1: - default: - static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); - } - } - } - } - } - break; - default: - ASSERT(false); - break; - } - - num_fams2_streams++; - } - } - - if (num_fams2_streams > 0) { - /* copy FAMS2 configuration */ - memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, - &dml_ctx->v21.mode_programming.programming->fams2_global_config, - sizeof(struct dmub_cmd_fams2_global_config)); - - context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; - } - - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; -} - -bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) -{ - return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h deleted file mode 100644 index 4bff52eaaef8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h +++ /dev/null @@ -1,50 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef _DML21_UTILS_H_ -#define _DML21_UTILS_H_ - -struct dc_state; -struct dc_plane_state; -struct pipe_ctx; - -struct dml2_context; -struct dml2_display_rq_regs; -struct dml2_display_dlg_regs; -struct dml2_display_ttu_regs; - -int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); -int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); -bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); -void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct dml2_per_stream_programming *stream_programming); -void dml21_populate_mall_allocation_size(struct dc_state *context, - struct dml2_context *in_ctx, - struct dml2_per_plane_programming *pln_prog, - struct pipe_ctx *dc_pipe); -bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx); -void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index); -void find_pipe_regs_idx(const struct dml2_context *dml_ctx, - struct pipe_ctx *pipe, unsigned int *pipe_regs_idx); -int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, - struct dc_state *context, - struct dml2_context *dml_ctx, - struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], - struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], - int dml_plane_idx); -void dml21_program_dc_pipe(struct dml2_context *dml_ctx, - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct dml2_per_plane_programming *pln_prog, - struct dml2_per_stream_programming *stream_prog); -void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); -unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id); -void dml21_build_fams2_programming(const struct dc *dc, - struct dc_state *context, - struct dml2_context *dml_ctx); -bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c deleted file mode 100644 index 43d333d6608e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ /dev/null @@ -1,468 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_internal_types.h" -#include "dml_top.h" -#include "dml2_core_dcn4_calcs.h" -#include "dml2_internal_shared_types.h" -#include "dml21_utils.h" -#include "dml21_translation_helper.h" -#include "dml2_dc_resource_mgmt.h" - -#define INVALID -1 - -static bool dml21_allocate_memory(struct dml2_context **dml_ctx) -{ - *dml_ctx = vzalloc(sizeof(struct dml2_context)); - if (!(*dml_ctx)) - return false; - - (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); - if (!((*dml_ctx)->v21.dml_init.dml2_instance)) - return false; - - (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; - (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; - - (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; - (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - - (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); - if (!((*dml_ctx)->v21.mode_programming.programming)) - return false; - - return true; -} - -static void dml21_populate_configuration_options(const struct dc *in_dc, - struct dml2_context *dml_ctx, - const struct dml2_configuration_options *config) -{ - dml_ctx->config = *config; - - /* UCLK P-State options */ - if (in_dc->debug.dml21_force_pstate_method) { - dml_ctx->config.pmo.force_pstate_method_enable = true; - for (int i = 0; i < MAX_PIPES; i++) - dml_ctx->config.pmo.force_pstate_method_values[i] = in_dc->debug.dml21_force_pstate_method_values[i]; - } else { - dml_ctx->config.pmo.force_pstate_method_enable = false; - } -} - -static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) -{ - - dml_ctx->architecture = dml2_architecture_21; - - dml21_populate_configuration_options(in_dc, dml_ctx, config); - - DC_FP_START(); - - dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc); - - dml2_initialize_instance(&dml_ctx->v21.dml_init); - - DC_FP_END(); -} - -bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) -{ - /* Allocate memory for initializing DML21 instance */ - if (!dml21_allocate_memory(dml_ctx)) - return false; - - dml21_init(in_dc, *dml_ctx, config); - - return true; -} - -void dml21_destroy(struct dml2_context *dml2) -{ - vfree(dml2->v21.dml_init.dml2_instance); - vfree(dml2->v21.mode_programming.programming); -} - -static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, - struct dml2_context *in_ctx, unsigned int pipe_cnt) -{ - unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0; - struct dml2_per_plane_programming *pln_prog = NULL; - struct dml2_per_stream_programming *stream_prog = NULL; - struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; - int num_pipes; - unsigned int dml_phantom_prog_idx; - - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - - /* copy global DCHUBBUB arbiter registers */ - memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs)); - - /* legacy only */ - context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64; - - context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0; - context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0; - context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0; - - /* phantom's start after main planes */ - dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes; - - for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) { - pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; - - if (!pln_prog->plane_descriptor) - continue; - - stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index]; - num_dpps_required = pln_prog->num_dpps_required; - - if (num_dpps_required == 0) { - continue; - } - num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); - - if (num_pipes <= 0) - continue; - - /* program each pipe */ - for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { - dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog); - - if (pln_prog->phantom_plane.valid && dc_phantom_pipes[dc_pipe_index]) { - dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog); - } - } - - /* copy per plane mcache allocation */ - memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); - if (pln_prog->phantom_plane.valid) { - memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx], - &pln_prog->phantom_plane.mcache_allocation, - sizeof(struct dml2_mcache_surface_allocation)); - - dml_phantom_prog_idx++; - } - } - - /* assign global clocks */ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) { - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = - in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000; - } else { - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000; - } - - if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) { - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = - in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000; - } else { - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000; - } - - /* get global mall allocation */ - if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) { - context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes); - } else { - context->bw_ctx.bw.dcn.clk.num_ways = 0; - } -} - -static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params) -{ - int dc_plane_idx = 0; - int dml_prog_idx, stream_idx, plane_idx; - struct dml2_per_plane_programming *pln_prog = NULL; - - for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) { - for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) { - dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context); - if (dml_prog_idx == INVALID) { - continue; - } - pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; - mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid; - mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0; - mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1; - mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache; - mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1; - memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0, - pln_prog->mcache_allocation.mcache_x_offsets_plane0, - sizeof(int) * (DML2_MAX_MCACHES + 1)); - memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1, - pln_prog->mcache_allocation.mcache_x_offsets_plane1, - sizeof(int) * (DML2_MAX_MCACHES + 1)); - dc_plane_idx++; - } - } -} - -static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) -{ - bool result = false; - struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming; - struct dc_mcache_params mcache_params[MAX_PLANES] = {0}; - - memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); - memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); - memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out)); - - if (!context) - return true; - - if (context->stream_count == 0) { - dml21_build_fams2_programming(in_dc, context, dml_ctx); - return true; - } - - /* scrub phantom's from current dc_state */ - dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); - dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); - - /* Populate stream, plane mappings and other fields in display config. */ - DC_FP_START(); - result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); - DC_FP_END(); - if (!result) - return false; - - DC_FP_START(); - result = dml2_build_mode_programming(mode_programming); - DC_FP_END(); - if (!result) - return false; - - /* Check and map HW resources */ - if (result && !dml_ctx->config.skip_hw_state_mapping) { - dml21_map_hw_resources(dml_ctx); - dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state); - /* if subvp phantoms are present, expand them into dc context */ - dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx); - - if (in_dc->res_pool->funcs->program_mcache_pipe_config) { - //Prepare mcache params for each plane based on mcache output from DML - dml21_prepare_mcache_params(dml_ctx, context, mcache_params); - - //populate mcache regs to each pipe - dml_ctx->config.callbacks.allocate_mcache(context, mcache_params); - } - } - - /* Copy DML CLK, WM and REG outputs to bandwidth context */ - if (result && !dml_ctx->config.skip_hw_state_mapping) { - dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); - dml21_copy_clocks_to_dc_state(dml_ctx, context); - dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); - dml21_build_fams2_programming(in_dc, context, dml_ctx); - } - - return true; -} - -static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) -{ - bool is_supported = false; - struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init; - struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support; - - memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); - memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); - memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out)); - - if (!context || context->stream_count == 0) - return true; - - /* Scrub phantom's from current dc_state */ - dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); - dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); - - mode_support->dml2_instance = dml_init->dml2_instance; - DC_FP_START(); - dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); - dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; - is_supported = dml2_check_mode_supported(mode_support); - DC_FP_END(); - if (!is_supported) - return false; - - return true; -} - -bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, - enum dc_validate_mode validate_mode) -{ - bool out = false; - - /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ - if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) - out = dml21_check_mode_support(in_dc, context, dml_ctx); - else - out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); - - return out; -} - -void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) -{ - unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index; - int num_pipes; - struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; - - struct dml2_per_plane_programming *pln_prog = NULL; - struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL; - struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals; - - if (context->stream_count == 0) { - return; - } - - memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out)); - l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance; - - /* phantom's start after main planes */ - dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes; - - /* Build mcache programming parameters per plane per pipe */ - for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { - pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; - - mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx]; - memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); - mcache_config->plane_descriptor = pln_prog->plane_descriptor; - mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx]; - mcache_config->num_pipes = pln_prog->num_dpps_required; - l->build_mcache_programming_params.num_configurations++; - - if (pln_prog->num_dpps_required == 0) { - continue; - } - - num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); - if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || - dc_main_pipes[0]->plane_state == NULL) - continue; - - /* get config for each pipe */ - for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { - ASSERT(dc_main_pipes[dc_pipe_index]); - dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); - } - - /* get config for each phantom pipe */ - if (pln_prog->phantom_plane.valid && - dc_phantom_pipes[0] && - dc_main_pipes[0]->stream && - dc_phantom_pipes[0]->plane_state) { - mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx]; - memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); - mcache_config->plane_descriptor = pln_prog->plane_descriptor; - mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx]; - mcache_config->num_pipes = pln_prog->num_dpps_required; - l->build_mcache_programming_params.num_configurations++; - - for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { - ASSERT(dc_phantom_pipes[dc_pipe_index]); - dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); - } - - /* increment phantom index */ - dml_phantom_prog_idx++; - } - } - - /* Call to generate mcache programming per plane per pipe for the given display configuration */ - dml2_build_mcache_programming(&l->build_mcache_programming_params); - - /* get per plane per pipe mcache programming */ - for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { - pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; - - num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); - if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || - dc_main_pipes[0]->plane_state == NULL) - continue; - - /* get config for each pipe */ - for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { - ASSERT(dc_main_pipes[dc_pipe_index]); - if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) { - memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs, - l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index], - sizeof(struct dml2_hubp_pipe_mcache_regs)); - } - } - - /* get config for each phantom pipe */ - if (pln_prog->phantom_plane.valid && - dc_phantom_pipes[0] && - dc_main_pipes[0]->stream && - dc_phantom_pipes[0]->plane_state) { - for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { - ASSERT(dc_phantom_pipes[dc_pipe_index]); - if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) { - memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs, - l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index], - sizeof(struct dml2_hubp_pipe_mcache_regs)); - } - } - /* increment phantom index */ - dml_phantom_prog_idx++; - } - } -} - -void dml21_copy(struct dml2_context *dst_dml_ctx, - struct dml2_context *src_dml_ctx) -{ - /* Preserve references to internals */ - struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance; - struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming; - - /* Copy context */ - memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context)); - - /* Copy Internals */ - memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance)); - memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming)); - - /* Restore references to internals */ - dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance; - - dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance; - dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance; - - dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config; - dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config; - - dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; - - DC_FP_START(); - - /* need to initialize copied instance for internal references to be correct */ - dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); - - DC_FP_END(); -} - -bool dml21_create_copy(struct dml2_context **dst_dml_ctx, - struct dml2_context *src_dml_ctx) -{ - /* Allocate memory for initializing DML21 instance */ - if (!dml21_allocate_memory(dst_dml_ctx)) - return false; - - dml21_copy(*dst_dml_ctx, src_dml_ctx); - - return true; -} - -void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) -{ - dml21_init(in_dc, dml_ctx, config); -} - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h deleted file mode 100644 index 15f92029d2e5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef _DML21_WRAPPER_H_ -#define _DML21_WRAPPER_H_ - -#include "os_types.h" -#include "dml_top_soc_parameter_types.h" -#include "dml_top_display_cfg_types.h" - -struct dc; -struct dc_state; -struct dml2_configuration_options; -struct dml2_context; -enum dc_validate_mode; - -/** - * dml2_create - Creates dml21_context. - * @in_dc: dc. - * @dml2: Created dml21 context. - * @config: dml21 configuration options. - * - * Create of DML21 is done as part of dc_state creation. - * DML21 IP, SOC and STATES are initialized at - * creation time. - * - * Return: True if dml2 is successfully created, false otherwise. - */ -bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); -void dml21_destroy(struct dml2_context *dml2); -void dml21_copy(struct dml2_context *dst_dml_ctx, - struct dml2_context *src_dml_ctx); -bool dml21_create_copy(struct dml2_context **dst_dml_ctx, - struct dml2_context *src_dml_ctx); -void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config); - -/** - * dml21_validate - Determines if a display configuration is supported or not. - * @in_dc: dc. - * @context: dc_state to be validated. - * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX - * will not populate context.res_ctx. - * - * Based on fast_validate option internally would call: - * - * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option - * Calculates if dc_state can be supported on the input display - * configuration. If supported, generates the necessary HW - * programming for the new dc_state. - * - * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option - * Calculates if dc_state can be supported for the input display - * config. - - * Context: Two threads may not invoke this function concurrently unless they reference - * separate dc_states for validation. - * Return: True if mode is supported, false otherwise. - */ -bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, - enum dc_validate_mode validate_mode); - -/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */ -void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); - -/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */ -struct socbb_ip_params_external { - struct dml2_ip_capabilities ip_params; - struct dml2_soc_bb soc_bb; -}; - -/*mcache parameters decided by dml*/ -struct dc_mcache_params { - bool valid; - /* - * For iMALL, dedicated mall mcaches are required (sharing of last - * slice possible), for legacy phantom or phantom without return - * the only mall mcaches need to be valid. - */ - bool requires_dedicated_mall_mcache; - unsigned int num_mcaches_plane0; - unsigned int num_mcaches_plane1; - /* - * Generally, plane0/1 slices must use a disjoint set of caches - * but in some cases the final segement of the two planes can - * use the same cache. If plane0_plane1 is set, then this is - * allowed. - * - * Similarly, the caches allocated to MALL prefetcher are generally - * disjoint, but if mall_prefetch is set, then the final segment - * between the main and the mall pixel requestor can use the same - * cache. - * - * Note that both bits may be set at the same time. - */ - struct { - bool mall_comb_mcache_p0; - bool mall_comb_mcache_p1; - bool plane0_plane1; - } last_slice_sharing; - /* - * A plane is divided into vertical slices of mcaches, - * which wrap on the surface width. - * - * For example, if the surface width is 7680, and split into - * three slices of equal width, the boundary array would contain - * [2560, 5120, 7680] - * - * The assignments are - * 0 = [0 .. 2559] - * 1 = [2560 .. 5119] - * 2 = [5120 .. 7679] - * 0 = [7680 .. INF] - * The final element implicitly is the same as the first, and - * at first seems invalid since it is never referenced (since) - * it is outside the surface. However, its useful when shifting - * (see below). - * - * For any given valid mcache assignment, a shifted version, wrapped - * on the surface width boundary is also assumed to be valid. - * - * For example, shifting [2560, 5120, 7680] by -50 results in - * [2510, 5170, 7630]. - * - * The assignments are now: - * 0 = [0 .. 2509] - * 1 = [2510 .. 5169] - * 2 = [5170 .. 7629] - * 0 = [7630 .. INF] - */ - int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; - int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; -}; -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h deleted file mode 100644 index 793e1c038efd..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ /dev/null @@ -1,373 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML_DML_DCN4_SOC_BB__ -#define __DML_DML_DCN4_SOC_BB__ - -#include "dml_top_soc_parameter_types.h" - -static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 17, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 40, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 83, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 33, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 62, - .dcfclk_derate_percent = 83, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 70, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 83, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 12, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 257, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - .minimum_uclk_khz = 97 * 1000, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 61, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 118, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, -}; - -static const struct dml2_soc_bb dml2_socbb_dcn401 = { - .clk_table = { - .uclk = { - .clk_values_khz = {97000}, - .num_clk_values = 1, - }, - .fclk = { - .clk_values_khz = {300000, 2500000}, - .num_clk_values = 2, - }, - .dcfclk = { - .clk_values_khz = {200000, 1564000}, - .num_clk_values = 2, - }, - .dispclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dppclk = { - .clk_values_khz = {100000, 2000000}, - .num_clk_values = 2, - }, - .dtbclk = { - .clk_values_khz = {100000, 1564000}, - .num_clk_values = 2, - }, - .phyclk = { - .clk_values_khz = {810000, 810000}, - .num_clk_values = 2, - }, - .socclk = { - .clk_values_khz = {300000, 1200000}, - .num_clk_values = 2, - }, - .dscclk = { - .clk_values_khz = {666667, 666667}, - .num_clk_values = 2, - }, - .phyclk_d18 = { - .clk_values_khz = {625000, 625000}, - .num_clk_values = 2, - }, - .phyclk_d32 = { - .clk_values_khz = {625000, 625000}, - .num_clk_values = 2, - }, - .dram_config = { - .channel_width_bytes = 2, - .channel_count = 16, - .transactions_per_clock = 16, - }, - }, - - .qos_parameters = { - .derate_table = { - .system_active_urgent = { - .dram_derate_percent_pixel = 22, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 76, - .dcfclk_derate_percent = 100, - }, - .system_active_average = { - .dram_derate_percent_pixel = 15, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 57, - .dcfclk_derate_percent = 75, - }, - .dcn_mall_prefetch_urgent = { - .dram_derate_percent_pixel = 40, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 83, - .dcfclk_derate_percent = 100, - }, - .dcn_mall_prefetch_average = { - .dram_derate_percent_pixel = 30, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 62, - .dcfclk_derate_percent = 83, - }, - .system_idle_average = { - .dram_derate_percent_pixel = 70, - .dram_derate_percent_vm = 0, - .dram_derate_percent_pixel_and_vm = 0, - .fclk_derate_percent = 83, - .dcfclk_derate_percent = 100, - }, - }, - .writeback = { - .base_latency_us = 0, - .scaling_factor_us = 0, - .scaling_factor_mhz = 0, - }, - .qos_params = { - .dcn4x = { - .df_qos_response_time_fclk_cycles = 300, - .max_round_trip_to_furthest_cs_fclk_cycles = 350, - .mall_overhead_fclk_cycles = 50, - .meta_trip_adder_fclk_cycles = 36, - .average_transport_distance_fclk_cycles = 260, - .umc_urgent_ramp_latency_margin = 50, - .umc_max_latency_margin = 30, - .umc_average_latency_margin = 20, - .fabric_max_transport_latency_margin = 20, - .fabric_average_transport_latency_margin = 10, - - .per_uclk_dpm_params = { - { - // State 1 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 472, - .trip_to_memory_uclk_cycles = 827, - .meta_trip_to_memory_uclk_cycles = 827, - .maximum_latency_when_urgent_uclk_cycles = 72, - .average_latency_when_urgent_uclk_cycles = 72, - .maximum_latency_when_non_urgent_uclk_cycles = 827, - .average_latency_when_non_urgent_uclk_cycles = 117, - }, - { - // State 2 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 546, - .trip_to_memory_uclk_cycles = 848, - .meta_trip_to_memory_uclk_cycles = 848, - .maximum_latency_when_urgent_uclk_cycles = 146, - .average_latency_when_urgent_uclk_cycles = 146, - .maximum_latency_when_non_urgent_uclk_cycles = 848, - .average_latency_when_non_urgent_uclk_cycles = 133, - }, - { - // State 3 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 564, - .trip_to_memory_uclk_cycles = 853, - .meta_trip_to_memory_uclk_cycles = 853, - .maximum_latency_when_urgent_uclk_cycles = 164, - .average_latency_when_urgent_uclk_cycles = 164, - .maximum_latency_when_non_urgent_uclk_cycles = 853, - .average_latency_when_non_urgent_uclk_cycles = 136, - }, - { - // State 4 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 613, - .trip_to_memory_uclk_cycles = 869, - .meta_trip_to_memory_uclk_cycles = 869, - .maximum_latency_when_urgent_uclk_cycles = 213, - .average_latency_when_urgent_uclk_cycles = 213, - .maximum_latency_when_non_urgent_uclk_cycles = 869, - .average_latency_when_non_urgent_uclk_cycles = 149, - }, - { - // State 5 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 632, - .trip_to_memory_uclk_cycles = 874, - .meta_trip_to_memory_uclk_cycles = 874, - .maximum_latency_when_urgent_uclk_cycles = 232, - .average_latency_when_urgent_uclk_cycles = 232, - .maximum_latency_when_non_urgent_uclk_cycles = 874, - .average_latency_when_non_urgent_uclk_cycles = 153, - }, - { - // State 6 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 665, - .trip_to_memory_uclk_cycles = 885, - .meta_trip_to_memory_uclk_cycles = 885, - .maximum_latency_when_urgent_uclk_cycles = 265, - .average_latency_when_urgent_uclk_cycles = 265, - .maximum_latency_when_non_urgent_uclk_cycles = 885, - .average_latency_when_non_urgent_uclk_cycles = 161, - }, - { - // State 7 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 689, - .trip_to_memory_uclk_cycles = 895, - .meta_trip_to_memory_uclk_cycles = 895, - .maximum_latency_when_urgent_uclk_cycles = 289, - .average_latency_when_urgent_uclk_cycles = 289, - .maximum_latency_when_non_urgent_uclk_cycles = 895, - .average_latency_when_non_urgent_uclk_cycles = 167, - }, - { - // State 8 - .minimum_uclk_khz = 0, - .urgent_ramp_uclk_cycles = 716, - .trip_to_memory_uclk_cycles = 902, - .meta_trip_to_memory_uclk_cycles = 902, - .maximum_latency_when_urgent_uclk_cycles = 316, - .average_latency_when_urgent_uclk_cycles = 316, - .maximum_latency_when_non_urgent_uclk_cycles = 902, - .average_latency_when_non_urgent_uclk_cycles = 174, - }, - }, - }, - }, - .qos_type = dml2_qos_param_type_dcn4x, - }, - - .power_management_parameters = { - .dram_clk_change_blackout_us = 400, - .fclk_change_blackout_us = 0, - .g7_ppt_blackout_us = 0, - .stutter_enter_plus_exit_latency_us = 54, - .stutter_exit_latency_us = 41, - .z8_stutter_enter_plus_exit_latency_us = 0, - .z8_stutter_exit_latency_us = 0, - /* - .g6_temp_read_blackout_us = { - 23.00, - 10.00, - 10.00, - 8.00, - 8.00, - 5.00, - 5.00, - 5.00, - }, - */ - }, - - .vmin_limit = { - .dispclk_khz = 600 * 1000, - }, - - .dprefclk_mhz = 720, - .xtalclk_mhz = 100, - .pcie_refclk_mhz = 100, - .dchub_refclk_mhz = 50, - .mall_allocated_for_dcn_mbytes = 64, - .max_outstanding_reqs = 512, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .hostvm_min_page_size_kbytes = 0, - .gpuvm_min_page_size_kbytes = 256, - .phy_downspread_percent = 0.38, - .dcn_downspread_percent = 0.38, - .dispclk_dppclk_vco_speed_mhz = 4500, - .do_urgent_latency_adjustment = 0, - .mem_word_bytes = 32, - .num_dcc_mcaches = 8, - .mcache_size_bytes = 2048, - .mcache_line_size_bytes = 32, - .max_fclk_for_uclk_dpm_khz = 1250 * 1000, -}; - -static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { - .pipe_count = 4, - .otg_count = 4, - .num_dsc = 4, - .max_num_dp2p0_streams = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1344, - .config_return_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .compressed_buffer_segment_size_in_kbytes = 64, - .cursor_buffer_size = 24, - .max_flip_time_us = 80, - .max_flip_time_lines = 32, - .hostvm_mode = 0, - .subvp_drr_scheduling_margin_us = 100, - .subvp_prefetch_end_to_mall_start_us = 15, - .subvp_fw_processing_delay = 15, - .max_vactive_det_fill_delay_us = 400, - - .fams2 = { - .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 550, - .vertical_interrupt_ack_delay_us = 40, - .allow_programming_delay_us = 18, - .min_allow_width_us = 20, - .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 200, - .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 35, - - .lock_timeout_us = 5000, - .recovery_timeout_us = 5000, - .flip_programming_delay_us = 300, - }, -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h deleted file mode 100644 index 281d7ad230d8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_EXTERNAL_LIB_DEPS__ -#define __DML2_EXTERNAL_LIB_DEPS__ - -#include "os_types.h" - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h deleted file mode 100644 index a64ec4dcf11a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_H__ -#define __DML_TOP_H__ - -#include "dml_top_types.h" - -/* - * Top Level Interface for DML2 - */ - -/* - * Returns the size of the DML instance for the caller to allocate - */ -unsigned int dml2_get_instance_size_bytes(void); - -/* - * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...) - */ -bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out); - -/* - * Determines if the input mode is supported (boolean) on the SoC at all. Does not return - * information on how mode should be programmed. - */ -bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out); - -/* - * Determines the full (optimized) programming for the input mode. Returns minimum - * clocks as well as dchub register programming values for all pipes, additional meta - * such as ODM or MPCC combine factors. - */ -bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out); - -/* - * Determines the correct per pipe mcache register programming for a valid mode. - * The mcache allocation must have been calculated (successfully) in a previous - * call to dml2_build_mode_programming. - * The actual hubp viewport dimensions be what the actual registers will be - * programmed to (i.e. based on scaler setup). - */ -bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h deleted file mode 100644 index 8e5a30287220..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ /dev/null @@ -1,189 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __dml2_TOP_DCHUB_REGISTERS_H__ -#define __dml2_TOP_DCHUB_REGISTERS_H__ - -#include "dml2_external_lib_deps.h" -// These types are uint32_t as they represent actual calculated register values for HW - -struct dml2_display_dlg_regs { - uint32_t refcyc_h_blank_end; - uint32_t dlg_vblank_end; - uint32_t min_dst_y_next_start; - uint32_t refcyc_per_htotal; - uint32_t refcyc_x_after_scaler; - uint32_t dst_y_after_scaler; - uint32_t dst_y_prefetch; - uint32_t dst_y_per_vm_vblank; - uint32_t dst_y_per_row_vblank; - uint32_t dst_y_per_vm_flip; - uint32_t dst_y_per_row_flip; - uint32_t ref_freq_to_pix_freq; - uint32_t vratio_prefetch; - uint32_t vratio_prefetch_c; - uint32_t refcyc_per_tdlut_group; - uint32_t refcyc_per_pte_group_vblank_l; - uint32_t refcyc_per_pte_group_vblank_c; - uint32_t refcyc_per_pte_group_flip_l; - uint32_t refcyc_per_pte_group_flip_c; - uint32_t dst_y_per_pte_row_nom_l; - uint32_t dst_y_per_pte_row_nom_c; - uint32_t refcyc_per_pte_group_nom_l; - uint32_t refcyc_per_pte_group_nom_c; - uint32_t refcyc_per_line_delivery_pre_l; - uint32_t refcyc_per_line_delivery_pre_c; - uint32_t refcyc_per_line_delivery_l; - uint32_t refcyc_per_line_delivery_c; - uint32_t refcyc_per_vm_group_vblank; - uint32_t refcyc_per_vm_group_flip; - uint32_t refcyc_per_vm_req_vblank; - uint32_t refcyc_per_vm_req_flip; - uint32_t dst_y_offset_cur0; - uint32_t chunk_hdl_adjust_cur0; - uint32_t vready_after_vcount0; - uint32_t dst_y_delta_drq_limit; - uint32_t refcyc_per_vm_dmdata; - uint32_t dmdata_dl_delta; - - // MRQ - uint32_t refcyc_per_meta_chunk_vblank_l; - uint32_t refcyc_per_meta_chunk_vblank_c; - uint32_t refcyc_per_meta_chunk_flip_l; - uint32_t refcyc_per_meta_chunk_flip_c; - uint32_t dst_y_per_meta_row_nom_l; - uint32_t dst_y_per_meta_row_nom_c; - uint32_t refcyc_per_meta_chunk_nom_l; - uint32_t refcyc_per_meta_chunk_nom_c; -}; - -struct dml2_display_ttu_regs { - uint32_t qos_level_low_wm; - uint32_t qos_level_high_wm; - uint32_t min_ttu_vblank; - uint32_t qos_level_flip; - uint32_t refcyc_per_req_delivery_l; - uint32_t refcyc_per_req_delivery_c; - uint32_t refcyc_per_req_delivery_cur0; - uint32_t refcyc_per_req_delivery_pre_l; - uint32_t refcyc_per_req_delivery_pre_c; - uint32_t refcyc_per_req_delivery_pre_cur0; - uint32_t qos_level_fixed_l; - uint32_t qos_level_fixed_c; - uint32_t qos_level_fixed_cur0; - uint32_t qos_ramp_disable_l; - uint32_t qos_ramp_disable_c; - uint32_t qos_ramp_disable_cur0; -}; - -struct dml2_display_arb_regs { - uint32_t max_req_outstanding; - uint32_t min_req_outstanding; - uint32_t sat_level_us; - uint32_t hvm_max_qos_commit_threshold; - uint32_t hvm_min_req_outstand_commit_threshold; - uint32_t compbuf_reserved_space_kbytes; - uint32_t compbuf_size; - uint32_t sdpif_request_rate_limit; - uint32_t allow_sdpif_rate_limit_when_cstate_req; - uint32_t dcfclk_deep_sleep_hysteresis; - uint32_t pstate_stall_threshold; -}; - -struct dml2_cursor_dlg_regs{ - uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET - uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET - uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST - - uint32_t qos_level_fixed; - uint32_t qos_ramp_disable; -}; - -struct dml2_display_plane_rq_regs { - uint32_t chunk_size; - uint32_t min_chunk_size; - uint32_t dpte_group_size; - uint32_t mpte_group_size; - uint32_t swath_height; - uint32_t pte_row_height_linear; - - // MRQ - uint32_t meta_chunk_size; - uint32_t min_meta_chunk_size; -}; - -struct dml2_display_rq_regs { - struct dml2_display_plane_rq_regs rq_regs_l; - struct dml2_display_plane_rq_regs rq_regs_c; - uint32_t drq_expansion_mode; - uint32_t prq_expansion_mode; - uint32_t crq_expansion_mode; - uint32_t plane1_base_address; - uint32_t unbounded_request_enabled; - - // MRQ - uint32_t mrq_expansion_mode; -}; - -struct dml2_display_mcache_regs { - uint32_t mcache_id_first; - uint32_t mcache_id_second; - uint32_t split_location; -}; - -struct dml2_hubp_pipe_mcache_regs { - struct { - struct dml2_display_mcache_regs p0; - struct dml2_display_mcache_regs p1; - } main; - struct { - struct dml2_display_mcache_regs p0; - struct dml2_display_mcache_regs p1; - } mall; -}; - -struct dml2_dchub_per_pipe_register_set { - struct dml2_display_rq_regs rq_regs; - struct dml2_display_ttu_regs ttu_regs; - struct dml2_display_dlg_regs dlg_regs; - - uint32_t det_size; -}; - -struct dml2_dchub_watermark_regs { - /* watermarks */ - uint32_t urgent; - uint32_t sr_enter; - uint32_t sr_exit; - uint32_t sr_enter_z8; - uint32_t sr_exit_z8; - uint32_t sr_enter_low_power; - uint32_t sr_exit_low_power; - uint32_t uclk_pstate; - uint32_t fclk_pstate; - uint32_t temp_read_or_ppt; - uint32_t usr; - /* qos */ - uint32_t refcyc_per_trip_to_mem; - uint32_t refcyc_per_meta_trip_to_mem; - uint32_t frac_urg_bw_flip; - uint32_t frac_urg_bw_nom; - uint32_t frac_urg_bw_mall; -}; - -enum dml2_dchub_watermark_reg_set_index { - DML2_DCHUB_WATERMARK_SET_A = 0, - DML2_DCHUB_WATERMARK_SET_B = 1, - DML2_DCHUB_WATERMARK_SET_C = 2, - DML2_DCHUB_WATERMARK_SET_D = 3, - DML2_DCHUB_WATERMARK_SET_NUM = 4, -}; - -struct dml2_dchub_global_register_set { - struct dml2_display_arb_regs arb_regs; - struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM]; - unsigned int num_watermark_sets; -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h deleted file mode 100644 index 13749c9fcf18..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ /dev/null @@ -1,520 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ -#define __DML_TOP_DISPLAY_CFG_TYPES_H__ - -#include "dml2_external_lib_deps.h" - -#define DML2_MAX_PLANES 8 -#define DML2_MAX_DCN_PIPES 8 -#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches -#define DML2_MAX_WRITEBACK 3 - -enum dml2_swizzle_mode { - dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled - dml2_sw_256b_2d, - dml2_sw_4kb_2d, - dml2_sw_64kb_2d, - dml2_sw_256kb_2d, - - dml2_gfx11_sw_linear, - dml2_gfx11_sw_64kb_d, - dml2_gfx11_sw_64kb_d_t, - dml2_gfx11_sw_64kb_d_x, - dml2_gfx11_sw_64kb_r_x, - dml2_gfx11_sw_256kb_d_x, - dml2_gfx11_sw_256kb_r_x, - -}; - -enum dml2_source_format_class { - dml2_444_8 = 0, - dml2_444_16 = 1, - dml2_444_32 = 2, - dml2_444_64 = 3, - dml2_420_8 = 4, - dml2_420_10 = 5, - dml2_420_12 = 6, - dml2_rgbe_alpha = 9, - dml2_rgbe = 10, - dml2_mono_8 = 11, - dml2_mono_16 = 12, - dml2_422_planar_8 = 13, - dml2_422_planar_10 = 14, - dml2_422_planar_12 = 15, - dml2_422_packed_8 = 16, - dml2_422_packed_10 = 17, - dml2_422_packed_12 = 18 -}; - -enum dml2_sample_positioning { - dml2_interstitial = 0, - dml2_cosited = 1 -}; - -enum dml2_rotation_angle { - dml2_rotation_0 = 0, - dml2_rotation_90 = 1, - dml2_rotation_180 = 2, - dml2_rotation_270 = 3 -}; - -enum dml2_output_format_class { - dml2_444 = 0, - dml2_s422 = 1, - dml2_n422 = 2, - dml2_420 = 3 -}; - -enum dml2_output_encoder_class { - dml2_dp = 0, - dml2_edp = 1, - dml2_dp2p0 = 2, - dml2_hdmi = 3, - dml2_hdmifrl = 4, - dml2_none = 5 -}; - -enum dml2_output_link_dp_rate { - dml2_dp_rate_na = 0, - dml2_dp_rate_hbr = 1, - dml2_dp_rate_hbr2 = 2, - dml2_dp_rate_hbr3 = 3, - dml2_dp_rate_uhbr10 = 4, - dml2_dp_rate_uhbr13p5 = 5, - dml2_dp_rate_uhbr20 = 6 -}; - -enum dml2_uclk_pstate_change_strategy { - dml2_uclk_pstate_change_strategy_auto = 0, - dml2_uclk_pstate_change_strategy_force_vactive = 1, - dml2_uclk_pstate_change_strategy_force_vblank = 2, - dml2_uclk_pstate_change_strategy_force_drr = 3, - dml2_uclk_pstate_change_strategy_force_mall_svp = 4, - dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5, -}; - -enum dml2_svp_mode_override { - dml2_svp_mode_override_auto = 0, - dml2_svp_mode_override_main_pipe = 1, - dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions - dml2_svp_mode_override_phantom_pipe_no_data_return = 3, - dml2_svp_mode_override_imall = 4 -}; - -enum dml2_refresh_from_mall_mode_override { - dml2_refresh_from_mall_mode_override_auto = 0, - dml2_refresh_from_mall_mode_override_force_disable = 1, - dml2_refresh_from_mall_mode_override_force_enable = 2 -}; - -enum dml2_odm_mode { - dml2_odm_mode_auto = 0, - dml2_odm_mode_bypass, - dml2_odm_mode_combine_2to1, - dml2_odm_mode_combine_3to1, - dml2_odm_mode_combine_4to1, - dml2_odm_mode_split_1to2, - dml2_odm_mode_mso_1to2, - dml2_odm_mode_mso_1to4 -}; - -enum dml2_scaling_transform { - dml2_scaling_transform_explicit = 0, - dml2_scaling_transform_fullscreen, - dml2_scaling_transform_aspect_ratio, - dml2_scaling_transform_centered -}; - -enum dml2_dsc_enable_option { - dml2_dsc_disable = 0, - dml2_dsc_enable = 1, - dml2_dsc_enable_if_necessary = 2 -}; - -enum dml2_tdlut_addressing_mode { - dml2_tdlut_sw_linear = 0, - dml2_tdlut_simple_linear = 1 -}; - -enum dml2_tdlut_width_mode { - dml2_tdlut_width_17_cube = 0, - dml2_tdlut_width_33_cube = 1 -}; - -enum dml2_twait_budgeting_setting { - dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait - - dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed - //(i.e. UCLK/FCLK DPM cannot be supported in active) - - dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that - // can support it -}; - -struct dml2_get_cursor_dlg_reg{ - unsigned int cursor_x_position; - unsigned int cursor_hotspot_x; - unsigned int cursor_primary_offset; - unsigned int cursor_secondary_offset; - bool cursor_stereo_en; - bool cursor_2x_magnify; - double hratio; - double pixel_rate_mhz; - double dlg_refclk_mhz; -}; - -/// @brief Surface Parameters -struct dml2_surface_cfg { - enum dml2_swizzle_mode tiling; - - struct { - unsigned long pitch; // In elements, two pixels per element in 422 packed format - unsigned long width; - unsigned long height; - } plane0; - - - struct { - unsigned long pitch; - unsigned long width; - unsigned long height; - } plane1; - - struct { - bool enable; - struct { - unsigned long pitch; - } plane0; - struct { - unsigned long pitch; - } plane1; - - struct { - double dcc_rate_plane0; - double dcc_rate_plane1; - double fraction_of_zero_size_request_plane0; - double fraction_of_zero_size_request_plane1; - } informative; - } dcc; -}; - - -struct dml2_composition_cfg { - enum dml2_rotation_angle rotation_angle; - bool mirrored; - enum dml2_scaling_transform scaling_transform; - bool rect_out_height_spans_vactive; - - struct { - bool stationary; - struct { - unsigned long width; - unsigned long height; - unsigned long x_start; - unsigned long y_start; - } plane0; - - struct { - unsigned long width; - unsigned long height; - unsigned long x_start; - unsigned long y_start; - } plane1; - } viewport; - - struct { - bool enabled; - bool easf_enabled; - bool isharp_enabled; - bool upsp_enabled; - enum dml2_sample_positioning upsp_sample_positioning; - unsigned int upsp_vtaps; - struct { - double h_ratio; - double v_ratio; - unsigned int h_taps; - unsigned int v_taps; - } plane0; - - struct { - double h_ratio; - double v_ratio; - unsigned int h_taps; - unsigned int v_taps; - } plane1; - - unsigned long rect_out_width; - } scaler_info; -}; - -struct dml2_timing_cfg { - unsigned long h_total; - unsigned long v_total; - unsigned long h_blank_end; - unsigned long v_blank_end; - unsigned long h_front_porch; - unsigned long v_front_porch; - unsigned long h_sync_width; - unsigned long pixel_clock_khz; - unsigned long h_active; - unsigned long v_active; - unsigned int bpc; //FIXME: review with Jun - struct { - enum dml2_dsc_enable_option enable; - unsigned int dsc_compressed_bpp_x16; - struct { - // for dv to specify num dsc slices to use - unsigned int num_slices; - } overrides; - } dsc; - bool interlaced; - struct { - /* static */ - bool enabled; - unsigned long min_refresh_uhz; - unsigned int max_instant_vtotal_delta; - /* dynamic */ - bool disallowed; - bool drr_active_variable; - bool drr_active_fixed; - } drr_config; - unsigned long vblank_nom; -}; - -struct dml2_link_output_cfg { - enum dml2_output_format_class output_format; - enum dml2_output_encoder_class output_encoder; - unsigned int output_dp_lane_count; - enum dml2_output_link_dp_rate output_dp_link_rate; - unsigned long audio_sample_rate; - unsigned long audio_sample_layout; - bool output_disabled; // The stream does not go to a backend for output to a physical - //connector (e.g. writeback only, phantom pipe) goes to writeback - bool validate_output; // Do not validate the link configuration for this display stream. -}; - -struct dml2_writeback_info { - enum dml2_source_format_class pixel_format; - unsigned long input_width; - unsigned long input_height; - unsigned long output_width; - unsigned long output_height; - unsigned long v_taps; - unsigned long h_taps; - unsigned long v_taps_chroma; - unsigned long h_taps_chroma; - double h_ratio; - double v_ratio; -}; - -struct dml2_writeback_cfg { - unsigned int active_writebacks_per_stream; - struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK]; -}; - -struct dml2_plane_parameters { - unsigned int stream_index; // Identifies which plane will be composed - - enum dml2_source_format_class pixel_format; - /* - * The surface and composition structures use - * the terms plane0 and plane1. These planes - * are expected to hold the following data based - * on the pixel format. - * - * RGB or YUV Non-Planar Types: - * dml2_444_8 - * dml2_444_16 - * dml2_444_32 - * dml2_444_64 - * dml2_rgbe - * - * plane0 = argb or rgbe - * plane1 = not used - * - * YUV Planar-Types: - * dml2_420_8 - * dml2_420_10 - * dml2_420_12 - * - * plane0 = luma - * plane1 = chroma - * - * RGB Planar Types: - * dml2_rgbe_alpha - * - * plane0 = rgbe - * plane1 = alpha - * - * Mono Non-Planar Types: - * dml2_mono_8 - * dml2_mono_16 - * - * plane0 = luma - * plane1 = not used - */ - - struct dml2_surface_cfg surface; - struct dml2_composition_cfg composition; - - struct { - bool enable; - unsigned long lines_before_active_required; - unsigned long transmitted_bytes; - } dynamic_meta_data; - - struct { - unsigned int num_cursors; - unsigned long cursor_width; - unsigned long cursor_bpp; - } cursor; - - // For TDLUT, SW would assume TDLUT is setup and enable all the time and - // budget for worst case addressing/width mode - struct { - bool setup_for_tdlut; - enum dml2_tdlut_addressing_mode tdlut_addressing_mode; - enum dml2_tdlut_width_mode tdlut_width_mode; - bool tdlut_mpc_width_flag; - } tdlut; - - bool immediate_flip; - - struct { - // Logical overrides to power management policies (usually) - enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; - enum dml2_refresh_from_mall_mode_override refresh_from_mall; - unsigned int det_size_override_kb; - unsigned int mpcc_combine_factor; - - // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait - // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override. - long reserved_vblank_time_ns; - unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay - unsigned int vactive_latency_to_hide_for_pstate_admissibility_us; - unsigned int gpuvm_min_page_size_kbytes; - unsigned int hostvm_min_page_size_kbytes; - - enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config - - struct { - // HW specific overrides, there's almost no reason to mess with these - // generally used for debugging or simulation - bool force_one_row_for_frame; - struct { - bool enable; - bool value; - } force_pte_buffer_mode; - double dppclk_mhz; - } hw; - } overrides; -}; - -struct dml2_stream_parameters { - struct dml2_timing_cfg timing; - struct dml2_link_output_cfg output; - struct dml2_writeback_cfg writeback; - - struct { - enum dml2_odm_mode odm_mode; - bool disable_dynamic_odm; - bool disable_subvp; - int minimum_vblank_idle_requirement_us; - bool minimize_active_latency_hiding; - - struct { - struct { - enum dml2_twait_budgeting_setting uclk_pstate; - enum dml2_twait_budgeting_setting fclk_pstate; - enum dml2_twait_budgeting_setting stutter_enter_exit; - } twait_budgeting; - } hw; - } overrides; -}; - -struct dml2_display_cfg { - bool gpuvm_enable; - bool ffbm_enable; - bool hostvm_enable; - - // Allocate DET proportionally between streams based on pixel rate - // and then allocate proportionally between planes. - bool minimize_det_reallocation; - - unsigned int gpuvm_max_page_table_levels; - unsigned int hostvm_max_non_cached_page_table_levels; - - struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES]; - struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES]; - - unsigned int num_planes; - unsigned int num_streams; - - struct { - struct { - // HW specific overrides, there's almost no reason to mess with these - // generally used for debugging or simulation - struct { - bool enable; - bool value; - } force_unbounded_requesting; - - struct { - bool enable; - bool value; - } force_nom_det_size_kbytes; - - bool mode_support_check_disable; - bool mcache_admissibility_check_disable; - bool surface_viewport_size_check_disable; - double dlg_ref_clk_mhz; - double dispclk_mhz; - double dcfclk_mhz; - bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode - } hw; - - struct { - bool uclk_pstate_change_disable; - bool fclk_pstate_change_disable; - bool g6_temp_read_pstate_disable; - bool g7_ppt_pstate_disable; - } power_management; - - bool enhanced_prefetch_schedule_acceleration; - bool dcc_programming_assumes_scan_direction_unknown; - bool synchronize_timings; - bool synchronize_ddr_displays_for_uclk_pstate_change; - bool max_outstanding_when_urgent_expected_disable; - bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming - unsigned int best_effort_min_active_latency_hiding_us; - bool all_streams_blanked; - } overrides; -}; - -struct dml2_pipe_configuration_descriptor { - struct { - unsigned int viewport_x_start; - unsigned int viewport_width; - } plane0; - - struct { - unsigned int viewport_x_start; - unsigned int viewport_width; - } plane1; - - bool plane1_enabled; - bool imall_enabled; -}; - -struct dml2_plane_mcache_configuration_descriptor { - const struct dml2_plane_parameters *plane_descriptor; - const struct dml2_mcache_surface_allocation *mcache_allocation; - - struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES]; - char num_pipes; -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h deleted file mode 100644 index 8f624a912e78..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_POLICY_TYPES_H__ -#define __DML_TOP_POLICY_TYPES_H__ - -struct dml2_policy_parameters { - unsigned long odm_combine_dispclk_threshold_khz; - unsigned int max_immediate_flip_latency; -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h deleted file mode 100644 index 4a9a0d5a09b7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ /dev/null @@ -1,215 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ -#define __DML_TOP_SOC_PARAMETER_TYPES_H__ - -#include "dml2_external_lib_deps.h" - -#define DML_MAX_CLK_TABLE_SIZE 20 - -struct dml2_soc_derate_values { - unsigned int dram_derate_percent_pixel; - unsigned int dram_derate_percent_vm; - unsigned int dram_derate_percent_pixel_and_vm; - - unsigned int fclk_derate_percent; - unsigned int dcfclk_derate_percent; -}; - -struct dml2_soc_derates { - struct dml2_soc_derate_values system_active_urgent; - struct dml2_soc_derate_values system_active_average; - struct dml2_soc_derate_values dcn_mall_prefetch_urgent; - struct dml2_soc_derate_values dcn_mall_prefetch_average; - struct dml2_soc_derate_values system_idle_average; -}; - -struct dml2_dcn32x_soc_qos_params { - struct { - unsigned int base_latency_us; - unsigned int base_latency_pixel_vm_us; - unsigned int base_latency_vm_us; - unsigned int scaling_factor_fclk_us; - unsigned int scaling_factor_mhz; - } urgent_latency_us; - - unsigned int loaded_round_trip_latency_fclk_cycles; - unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes; - unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; - unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes; -}; - -struct dml2_dcn4_uclk_dpm_dependent_qos_params { - unsigned long minimum_uclk_khz; - unsigned int urgent_ramp_uclk_cycles; - unsigned int trip_to_memory_uclk_cycles; - unsigned int meta_trip_to_memory_uclk_cycles; - unsigned int maximum_latency_when_urgent_uclk_cycles; - unsigned int average_latency_when_urgent_uclk_cycles; - unsigned int maximum_latency_when_non_urgent_uclk_cycles; - unsigned int average_latency_when_non_urgent_uclk_cycles; -}; - -struct dml2_dcn4x_soc_qos_params { - unsigned int df_qos_response_time_fclk_cycles; - unsigned int max_round_trip_to_furthest_cs_fclk_cycles; - unsigned int mall_overhead_fclk_cycles; - unsigned int meta_trip_adder_fclk_cycles; - unsigned int average_transport_distance_fclk_cycles; - double umc_urgent_ramp_latency_margin; - double umc_max_latency_margin; - double umc_average_latency_margin; - double fabric_max_transport_latency_margin; - double fabric_average_transport_latency_margin; - struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE]; -}; - -enum dml2_qos_param_type { - dml2_qos_param_type_dcn3, - dml2_qos_param_type_dcn4x -}; - -struct dml2_soc_qos_parameters { - struct dml2_soc_derates derate_table; - struct { - unsigned int base_latency_us; - unsigned int scaling_factor_us; - unsigned int scaling_factor_mhz; - } writeback; - - union { - struct dml2_dcn32x_soc_qos_params dcn32x; - struct dml2_dcn4x_soc_qos_params dcn4x; - } qos_params; - - enum dml2_qos_param_type qos_type; -}; - -struct dml2_soc_power_management_parameters { - double dram_clk_change_blackout_us; - double dram_clk_change_read_only_us; - double dram_clk_change_write_only_us; - double fclk_change_blackout_us; - double g7_ppt_blackout_us; - double g7_temperature_read_blackout_us; - double stutter_enter_plus_exit_latency_us; - double stutter_exit_latency_us; - double low_power_stutter_enter_plus_exit_latency_us; - double low_power_stutter_exit_latency_us; - double z8_stutter_enter_plus_exit_latency_us; - double z8_stutter_exit_latency_us; - double z8_min_idle_time; - double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE]; - double type_b_dram_clk_change_blackout_us; - double type_b_ppt_blackout_us; -}; - -struct dml2_clk_table { - unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE]; - unsigned char num_clk_values; -}; - -struct dml2_dram_params { - unsigned int channel_width_bytes; - unsigned int channel_count; - unsigned int transactions_per_clock; -}; - -struct dml2_soc_state_table { - struct dml2_clk_table uclk; - struct dml2_clk_table fclk; - struct dml2_clk_table dcfclk; - struct dml2_clk_table dispclk; - struct dml2_clk_table dppclk; - struct dml2_clk_table dtbclk; - struct dml2_clk_table phyclk; - struct dml2_clk_table socclk; - struct dml2_clk_table dscclk; - struct dml2_clk_table phyclk_d18; - struct dml2_clk_table phyclk_d32; - - struct dml2_dram_params dram_config; -}; - -struct dml2_soc_vmin_clock_limits { - unsigned long dispclk_khz; - unsigned long dcfclk_khz; -}; - -struct dml2_soc_bb { - struct dml2_soc_state_table clk_table; - struct dml2_soc_qos_parameters qos_parameters; - struct dml2_soc_power_management_parameters power_management_parameters; - struct dml2_soc_vmin_clock_limits vmin_limit; - - double lower_bound_bandwidth_dchub; - double fraction_of_urgent_bandwidth_nominal_target; - double fraction_of_urgent_bandwidth_flip_target; - unsigned int dprefclk_mhz; - unsigned int xtalclk_mhz; - unsigned int pcie_refclk_mhz; - unsigned int dchub_refclk_mhz; - unsigned int mall_allocated_for_dcn_mbytes; - unsigned int max_outstanding_reqs; - unsigned long fabric_datapath_to_dcn_data_return_bytes; - unsigned long return_bus_width_bytes; - unsigned long hostvm_min_page_size_kbytes; - unsigned long gpuvm_min_page_size_kbytes; - double phy_downspread_percent; - double dcn_downspread_percent; - double dispclk_dppclk_vco_speed_mhz; - bool no_dfs; - bool do_urgent_latency_adjustment; - unsigned int mem_word_bytes; - unsigned int num_dcc_mcaches; - unsigned int mcache_size_bytes; - unsigned int mcache_line_size_bytes; - unsigned long max_fclk_for_uclk_dpm_khz; -}; - -struct dml2_ip_capabilities { - unsigned int pipe_count; - unsigned int otg_count; - unsigned int TDLUT_33cube_count; - unsigned int num_dsc; - unsigned int max_num_dp2p0_streams; - unsigned int max_num_hdmi_frl_outputs; - unsigned int max_num_dp2p0_outputs; - unsigned int max_num_wb; - unsigned int rob_buffer_size_kbytes; - unsigned int config_return_buffer_size_in_kbytes; - unsigned int config_return_buffer_segment_size_in_kbytes; - unsigned int meta_fifo_size_in_kentries; - unsigned int compressed_buffer_segment_size_in_kbytes; - unsigned int cursor_buffer_size; - unsigned int max_flip_time_us; - unsigned int max_flip_time_lines; - unsigned int hostvm_mode; - unsigned int subvp_drr_scheduling_margin_us; - unsigned int subvp_prefetch_end_to_mall_start_us; - unsigned int subvp_fw_processing_delay; - unsigned int max_vactive_det_fill_delay_us; - unsigned int ppt_max_allow_delay_ns; - unsigned int temp_read_max_allow_delay_us; - unsigned int dummy_pstate_max_allow_delay_us; - /* FAMS2 delays */ - struct { - unsigned int max_allow_delay_us; - unsigned int scheduling_delay_us; - unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int - unsigned int allow_programming_delay_us; // time requires to program allow - unsigned int min_allow_width_us; - unsigned int subvp_df_throttle_delay_us; - unsigned int subvp_programming_delay_us; - unsigned int subvp_prefetch_to_mall_delay_us; - unsigned int drr_programming_delay_us; - - unsigned int lock_timeout_us; - unsigned int recovery_timeout_us; - unsigned int flip_programming_delay_us; - } fams2; -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h deleted file mode 100644 index 8646ce5f1c01..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ /dev/null @@ -1,750 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_TYPES_H__ -#define __DML_TOP_TYPES_H__ - -#include "dml_top_display_cfg_types.h" -#include "dml_top_soc_parameter_types.h" -#include "dml_top_policy_types.h" -#include "dml_top_dchub_registers.h" - -#include "dmub_cmd.h" - -struct dml2_instance; - -enum dml2_project_id { - dml2_project_invalid = 0, - dml2_project_dcn4x_stage1, - dml2_project_dcn4x_stage2, - dml2_project_dcn4x_stage2_auto_drr_svp, -}; - -enum dml2_pstate_change_support { - dml2_pstate_change_vactive = 0, - dml2_pstate_change_vblank = 1, - dml2_pstate_change_vblank_and_vactive = 2, - dml2_pstate_change_drr = 3, - dml2_pstate_change_mall_svp = 4, - dml2_pstate_change_mall_full_frame = 6, - dml2_pstate_change_unsupported = 7 -}; - -enum dml2_output_type_and_rate__type { - dml2_output_type_unknown = 0, - dml2_output_type_dp = 1, - dml2_output_type_edp = 2, - dml2_output_type_dp2p0 = 3, - dml2_output_type_hdmi = 4, - dml2_output_type_hdmifrl = 5 -}; - -enum dml2_output_type_and_rate__rate { - dml2_output_rate_unknown = 0, - dml2_output_rate_dp_rate_hbr = 1, - dml2_output_rate_dp_rate_hbr2 = 2, - dml2_output_rate_dp_rate_hbr3 = 3, - dml2_output_rate_dp_rate_uhbr10 = 4, - dml2_output_rate_dp_rate_uhbr13p5 = 5, - dml2_output_rate_dp_rate_uhbr20 = 6, - dml2_output_rate_hdmi_rate_3x3 = 7, - dml2_output_rate_hdmi_rate_6x3 = 8, - dml2_output_rate_hdmi_rate_6x4 = 9, - dml2_output_rate_hdmi_rate_8x4 = 10, - dml2_output_rate_hdmi_rate_10x4 = 11, - dml2_output_rate_hdmi_rate_12x4 = 12, - dml2_output_rate_hdmi_rate_16x4 = 13, - dml2_output_rate_hdmi_rate_20x4 = 14 -}; - -struct dml2_pmo_options { - bool disable_vblank; - bool disable_svp; - bool disable_drr_var; - bool disable_drr_clamped; - bool disable_drr_var_when_var_active; - bool disable_drr_clamped_when_var_active; - bool disable_fams2; - bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ - bool disable_dyn_odm; - bool disable_dyn_odm_for_multi_stream; - bool disable_dyn_odm_for_stream_with_svp; - struct dml2_pmo_pstate_strategy *override_strategy_lists[DML2_MAX_PLANES]; - unsigned int num_override_strategies_per_list[DML2_MAX_PLANES]; -}; - -struct dml2_options { - enum dml2_project_id project_id; - struct dml2_pmo_options pmo_options; -}; - -struct dml2_initialize_instance_in_out { - struct dml2_instance *dml2_instance; - struct dml2_options options; - struct dml2_soc_bb soc_bb; - struct dml2_ip_capabilities ip_caps; - - struct { - void *explicit_ip_bb; - unsigned int explicit_ip_bb_size; - } overrides; -}; - -struct dml2_reset_instance_in_out { - struct dml2_instance *dml2_instance; -}; - -struct dml2_check_mode_supported_in_out { - /* - * Inputs - */ - struct dml2_instance *dml2_instance; - const struct dml2_display_cfg *display_config; - - /* - * Outputs - */ - bool is_supported; -}; - -struct dml2_mcache_surface_allocation { - bool valid; - /* - * For iMALL, dedicated mall mcaches are required (sharing of last - * slice possible), for legacy phantom or phantom without return - * the only mall mcaches need to be valid. - */ - bool requires_dedicated_mall_mcache; - - unsigned int num_mcaches_plane0; - unsigned int num_mcaches_plane1; - /* - * A plane is divided into vertical slices of mcaches, - * which wrap on the surface width. - * - * For example, if the surface width is 7680, and split into - * three slices of equal width, the boundary array would contain - * [2560, 5120, 7680] - * - * The assignments are - * 0 = [0 .. 2559] - * 1 = [2560 .. 5119] - * 2 = [5120 .. 7679] - * 0 = [7680 .. INF] - * The final element implicitly is the same as the first, and - * at first seems invalid since it is never referenced (since) - * it is outside the surface. However, its useful when shifting - * (see below). - * - * For any given valid mcache assignment, a shifted version, wrapped - * on the surface width boundary is also assumed to be valid. - * - * For example, shifting [2560, 5120, 7680] by -50 results in - * [2510, 5170, 7630]. - * - * The assignments are now: - * 0 = [0 .. 2509] - * 1 = [2510 .. 5169] - * 2 = [5170 .. 7629] - * 0 = [7630 .. INF] - */ - int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; - int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; - - /* - * Shift grainularity is not necessarily 1 - */ - struct { - int p0; - int p1; - } shift_granularity; - - /* - * MCacheIDs have global scope in the SoC, and they are stored here. - * These IDs are generally not valid until all planes in a display - * configuration have had their mcache requirements calculated. - */ - int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1]; - int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1]; - int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1]; - int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1]; - - /* - * Generally, plane0/1 slices must use a disjoint set of caches - * but in some cases the final segement of the two planes can - * use the same cache. If plane0_plane1 is set, then this is - * allowed. - * - * Similarly, the caches allocated to MALL prefetcher are generally - * disjoint, but if mall_prefetch is set, then the final segment - * between the main and the mall pixel requestor can use the same - * cache. - * - * Note that both bits may be set at the same time. - */ - struct { - bool mall_comb_mcache_p0; - bool mall_comb_mcache_p1; - bool plane0_plane1; - } last_slice_sharing; - - struct { - int meta_row_bytes_plane0; - int meta_row_bytes_plane1; - } informative; -}; - -enum dml2_pstate_type { - dml2_pstate_type_uclk, - dml2_pstate_type_ppt, - dml2_pstate_type_temp_read, - dml2_pstate_type_dummy_pstate, - dml2_pstate_type_count -}; - -enum dml2_pstate_method { - dml2_pstate_method_na = 0, - /* hw exclusive modes */ - dml2_pstate_method_vactive = 1, - dml2_pstate_method_vblank = 2, - dml2_pstate_method_reserved_hw = 5, - /* fw assisted exclusive modes */ - dml2_pstate_method_fw_svp = 6, - dml2_pstate_method_reserved_fw = 10, - /* fw assisted modes requiring drr modulation */ - dml2_pstate_method_fw_vactive_drr = 11, - dml2_pstate_method_fw_vblank_drr = 12, - dml2_pstate_method_fw_svp_drr = 13, - dml2_pstate_method_reserved_fw_drr_clamped = 20, - dml2_pstate_method_fw_drr = 21, - dml2_pstate_method_reserved_fw_drr_var = 22, - dml2_pstate_method_count -}; - -struct dml2_per_plane_programming { - const struct dml2_plane_parameters *plane_descriptor; - - union { - struct { - unsigned long dppclk_khz; - } dcn4x; - } min_clocks; - - struct dml2_mcache_surface_allocation mcache_allocation; - - // If a stream is using automatic or forced odm combine - // and the stream for this plane has num_odms_required > 1 - // num_dpps_required is always equal to num_odms_required for - // ALL planes of the stream - - // If a stream is using odm split, then this value is always 1 - unsigned int num_dpps_required; - - enum dml2_pstate_method uclk_pstate_support_method; - - // MALL size requirements for MALL SS and SubVP - unsigned int surface_size_mall_bytes; - unsigned int svp_size_mall_bytes; - - struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; - - struct { - bool valid; - struct dml2_plane_parameters descriptor; - struct dml2_mcache_surface_allocation mcache_allocation; - struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; - } phantom_plane; -}; - -union dml2_global_sync_programming { - struct { - unsigned int vstartup_lines; - unsigned int vupdate_offset_pixels; - unsigned int vupdate_vupdate_width_pixels; - unsigned int vready_offset_pixels; - unsigned int pstate_keepout_start_lines; - } dcn4x; -}; - -struct dml2_per_stream_programming { - const struct dml2_stream_parameters *stream_descriptor; - - union { - struct { - unsigned long dscclk_khz; - unsigned long dtbclk_khz; - unsigned long phyclk_khz; - } dcn4x; - } min_clocks; - - union dml2_global_sync_programming global_sync; - - unsigned int num_odms_required; - - enum dml2_pstate_method uclk_pstate_method; - - struct { - bool enabled; - struct dml2_stream_parameters descriptor; - union dml2_global_sync_programming global_sync; - } phantom_stream; - - union dmub_cmd_fams2_config fams2_base_params; - union { - union dmub_cmd_fams2_config fams2_sub_params; - union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2; - }; -}; - -//----------------- -// Mode Support Information -//----------------- - -struct dml2_mode_support_info { - bool ModeIsSupported; //pipe_count = ip_params->max_num_dpp; - ip_caps->otg_count = ip_params->max_num_otg; - ip_caps->num_dsc = ip_params->num_dsc; - ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams; - ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs; - ip_caps->max_num_hdmi_frl_outputs = ip_params->max_num_hdmi_frl_outputs; - ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes; - ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes; - ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes; - ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; - ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; - ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size; - ip_caps->max_flip_time_us = ip_params->max_flip_time_us; - ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; - ip_caps->hostvm_mode = ip_params->hostvm_mode; - - // FIXME_STAGE2: cleanup after adding all dv override to ip_caps - ip_caps->subvp_drr_scheduling_margin_us = 100; - ip_caps->subvp_prefetch_end_to_mall_start_us = 15; - ip_caps->subvp_fw_processing_delay = 16; - -} - -static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps) -{ - ip_params->max_num_dpp = ip_caps->pipe_count; - ip_params->max_num_otg = ip_caps->otg_count; - ip_params->num_dsc = ip_caps->num_dsc; - ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; - ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs; - ip_params->max_num_hdmi_frl_outputs = ip_caps->max_num_hdmi_frl_outputs; - ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes; - ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes; - ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes; - ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; - ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; - ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size; - ip_params->max_flip_time_us = ip_caps->max_flip_time_us; - ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; - ip_params->hostvm_mode = ip_caps->hostvm_mode; -} - -bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) -{ - struct dml2_core_instance *core = in_out->instance; - - if (!in_out->minimum_clock_table) - return false; - else - core->minimum_clock_table = in_out->minimum_clock_table; - - if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) { - memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size); - - // FIXME_STAGE2: - // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info - // Should move DV to use ip_caps but need move more overrides to ip_caps - patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb); - core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; - core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; - core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines; - } else { - memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); - patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps); - core->clean_me_up.mode_lib.ip.imall_supported = false; - } - - memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); - memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); - - return true; -} - -static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, - const struct dml2_implicit_svp_meta *meta) -{ - memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); - - phantom->timing.v_total = meta->v_total; - phantom->timing.v_active = meta->v_active; - phantom->timing.v_front_porch = meta->v_front_porch; - phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; - phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; - phantom->timing.drr_config.enabled = false; -} - -static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, - const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) -{ - memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); - - phantom->stream_index = phantom_stream_index; - phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; - phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( - (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), - (double)main->composition.viewport.plane0.height); - phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( - (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), - (double)main->composition.viewport.plane1.height); - phantom->immediate_flip = false; - phantom->dynamic_meta_data.enable = false; - phantom->cursor.num_cursors = 0; - phantom->cursor.cursor_width = 0; - phantom->tdlut.setup_for_tdlut = false; -} - -static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, - struct dml2_core_scratch *scratch) -{ - unsigned int stream_index, plane_index; - const struct dml2_plane_parameters *main_plane; - const struct dml2_stream_parameters *main_stream; - const struct dml2_stream_parameters *phantom_stream; - - memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); - memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); - memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); - memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); - - if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) - return; - - /* disable unbounded requesting for all planes until stage 3 has been performed */ - if (!display_cfg->stage3.performed) { - svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; - svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; - } - // Create the phantom streams - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; - scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; - scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; - - if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { - // Create the phantom stream - create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], - main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); - - // Associate this phantom stream to the main stream - scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; - scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; - - // Increment num streams - svp_expanded_display_cfg->num_streams++; - } - } - - // Create the phantom planes - for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { - main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; - - if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { - main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; - phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; - create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], - main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); - - // Associate this phantom plane to the main plane - scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; - scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; - - // Increment num planes - svp_expanded_display_cfg->num_planes++; - - // Adjust the main plane settings - svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; - } - } -} - -static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg, - const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch) -{ - unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index; - unsigned int total_main_mcaches_required = 0; - int total_pipe_regs_copied = 0; - int dml_internal_pipe_index = 0; - const struct dml2_plane_parameters *main_plane; - const struct dml2_plane_parameters *phantom_plane; - const struct dml2_stream_parameters *main_stream; - const struct dml2_stream_parameters *phantom_stream; - - // Copy the unexpanded display config to output - memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); - - // Set the global register values - dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs); - // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion - // display config - dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]); - - // Check if FAMS2 is required - if (display_cfg->stage3.performed && display_cfg->stage3.success) { - programming->fams2_required = display_cfg->stage3.fams2_required; - - dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); - } - - // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) - for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) { - main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index]; - phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]]; - - // Set the descriptor - programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index]; - - // Set the odm combine factor - programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used; - - // Check if the stream has implicit SVP enabled - if (main_stream != phantom_stream) { - // If so, copy the phantom stream descriptor - programming->stream_programming[stream_index].phantom_stream.enabled = true; - memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters)); - } else { - programming->stream_programming[stream_index].phantom_stream.enabled = false; - } - - // Due to the way DML indexes data internally, it's easier to populate the rest of the display - // stream programming in the next stage - } - - dml_internal_pipe_index = 0; - total_pipe_regs_copied = 0; - stream_already_populated_mask = 0x0; - - // Loop over all main planes - for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) { - main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; - - // Set the descriptor - programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index]; - - // Set the mpc combine factor - programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - - // Setup the appropriate p-state strategy - if (display_cfg->stage3.performed && display_cfg->stage3.success) { - programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; - } else { - programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; - } - - dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); - - memcpy(&programming->plane_programming[plane_index].mcache_allocation, - &display_cfg->stage2.mcache_allocations[plane_index], - sizeof(struct dml2_mcache_surface_allocation)); - total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 + - programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 - - (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0); - - for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { - // Assign storage for this pipe's register values - programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; - memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); - total_pipe_regs_copied++; - - // Populate the main plane regs - dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); - - // Multiple planes can refer to the same stream index, so it's only necessary to populate it once - if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) { - dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index); - - programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method; - - /* unconditionally populate fams2 params */ - dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, - display_cfg, - &programming->stream_programming[main_plane->stream_index].fams2_base_params, - &programming->stream_programming[main_plane->stream_index].fams2_sub_params, - programming->stream_programming[main_plane->stream_index].uclk_pstate_method, - plane_index); - - stream_already_populated_mask |= (0x1 << main_plane->stream_index); - } - dml_internal_pipe_index++; - } - } - - for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) { - phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; - main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index]; - main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index]; - - programming->plane_programming[main_plane_index].phantom_plane.valid = true; - memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters)); - - dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index); - - /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */ - memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation, - &programming->plane_programming[main_plane_index].mcache_allocation, - sizeof(struct dml2_mcache_surface_allocation)); - for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) { - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required; - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] = - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index]; - } - for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) { - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required; - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] = - programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index]; - } - - for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) { - // Assign storage for this pipe's register values - programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; - memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); - total_pipe_regs_copied++; - - // Populate the phantom plane regs - dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index); - // Populate the phantom stream specific programming - if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) { - dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index); - - stream_already_populated_mask |= (0x1 << phantom_plane->stream_index); - } - - dml_internal_pipe_index++; - } - } -} - -bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) -{ - struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; - struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals; - - bool result; - unsigned int i, stream_index, stream_bitmask; - int unsigned odm_count, num_odm_output_segments, dpp_count; - - expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); - - l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib; - l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; - l->mode_support_ex_params.min_clk_table = in_out->min_clk_table; - l->mode_support_ex_params.min_clk_index = in_out->min_clk_index; - l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info; - - result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params); - - in_out->mode_support_result.cfg_support_info.is_supported = result; - - if (result) { - in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000); - in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000); - in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000); - - in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported; - in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported; - - in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000); - in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000); - - - in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000; - in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000; - - in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0; - in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0; - in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0; - in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0; - - in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); - in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); - in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); - in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); - - in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); - in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); - in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); - in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); - DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); - DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); - DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); - DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); - - for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { - in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); - } - - stream_bitmask = 0; - for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { - odm_count = 1; - dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; - num_odm_output_segments = 1; - - switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) { - case dml2_odm_mode_bypass: - odm_count = 1; - dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; - break; - case dml2_odm_mode_combine_2to1: - odm_count = 2; - dpp_count = 2; - break; - case dml2_odm_mode_combine_3to1: - odm_count = 3; - dpp_count = 3; - break; - case dml2_odm_mode_combine_4to1: - odm_count = 4; - dpp_count = 4; - break; - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - num_odm_output_segments = 2; - break; - case dml2_odm_mode_mso_1to4: - num_odm_output_segments = 4; - break; - case dml2_odm_mode_auto: - default: - odm_count = 1; - dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; - break; - } - - in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count; - - dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i); - - stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; - - in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; - DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); - - if (!((stream_bitmask >> stream_index) & 0x1)) { - in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; - in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_odm_output_segments = num_odm_output_segments; - in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i]; - in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i]; - dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i); - in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000); - stream_bitmask |= 0x1 << stream_index; - } - } - } - - return result; -} - -static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb) -{ - int i; - - for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { - if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i]) - return i; - } - return 0; -} - -bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out) -{ - struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; - struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals; - - bool result = false; - unsigned int pipe_offset; - int dml_internal_pipe_index; - int total_pipe_regs_copied = 0; - int stream_already_populated_mask = 0; - - int main_stream_index; - unsigned int plane_index; - - expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); - - l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib; - l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; - l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; - l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; - l->mode_programming_ex_params.programming = in_out->programming; - l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, - &core->clean_me_up.mode_lib.soc); - - result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); - - if (result) { - // If the input display configuration contains implict SVP, we need to use a special packer - if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) { - pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch); - } else { - memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg)); - - dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs); - dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]); - - dml_internal_pipe_index = 0; - - for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { - in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - - if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; - else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; - else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; - else { - if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive; - else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank; - else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; - } - - dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); - - memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation, - &in_out->display_cfg->stage2.mcache_allocations[plane_index], - sizeof(struct dml2_mcache_surface_allocation)); - - for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { - in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index]; - - // Assign storage for this pipe's register values - in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied]; - memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); - total_pipe_regs_copied++; - - // Populate - dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); - - main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index; - - // Multiple planes can refer to the same stream index, so it's only necessary to populate it once - if (!(stream_already_populated_mask & (0x1 << main_stream_index))) { - in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index]; - in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used; - dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index); - - stream_already_populated_mask |= (0x1 << main_stream_index); - } - dml_internal_pipe_index++; - } - } - } - } - - return result; -} - -bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out) -{ - struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib; - - if (in_out->mode_is_supported) - in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index; - else - in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index; - - dml2_core_calcs_get_informative(mode_lib, in_out->programming); - return true; -} - -bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out) -{ - memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation)); - - dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index); - - if (in_out->mcache_allocation->num_mcaches_plane0 > 0) - in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width; - - if (in_out->mcache_allocation->num_mcaches_plane1 > 0) - in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width; - - in_out->mcache_allocation->requires_dedicated_mall_mcache = false; - - return true; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h deleted file mode 100644 index a68bb001a346..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_DCN4_H__ -#define __DML2_CORE_DCN4_H__ -bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); -bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out); -bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out); -bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out); -bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c deleted file mode 100644 index f16792f79bef..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ /dev/null @@ -1,13339 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_internal_shared_types.h" -#include "dml2_core_dcn4_calcs.h" -#include "dml2_debug.h" -#include "lib_float_math.h" -#include "dml_top_types.h" - -#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 -#define DML_MAX_NUM_OF_SLICES_PER_DSC 4 -#define DML_MAX_COMPRESSION_RATIO 4 -//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW -//#define DML_GLOBAL_PREFETCH_CHECK -#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE -#define DML_MAX_VSTARTUP_START 1023 - -const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) -{ - switch (bw_type) { - case (dml2_core_internal_bw_sdp): - return("dml2_core_internal_bw_sdp"); - case (dml2_core_internal_bw_dram): - return("dml2_core_internal_bw_dram"); - case (dml2_core_internal_bw_max): - return("dml2_core_internal_bw_max"); - default: - return("dml2_core_internal_bw_unknown"); - } -} - -const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) -{ - switch (dml2_core_internal_soc_state_type) { - case (dml2_core_internal_soc_state_sys_idle): - return("dml2_core_internal_soc_state_sys_idle"); - case (dml2_core_internal_soc_state_sys_active): - return("dml2_core_internal_soc_state_sys_active"); - case (dml2_core_internal_soc_state_svp_prefetch): - return("dml2_core_internal_soc_state_svp_prefetch"); - case dml2_core_internal_soc_state_max: - default: - return("dml2_core_internal_soc_state_unknown"); - } -} - -static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) -{ - *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); - return dividend / divisor; -} - -static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) -{ - DML_LOG_VERBOSE("DML: ===================================== \n"); - DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); - if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); - if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); - if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); - if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); - if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); - if (!fail_only || support->ExceededMultistreamSlots == 1) - DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); - if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); - if (!fail_only || support->NotEnoughLanesForMSO == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->P2IWith420 == 1) - DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSC422NativeNotSupported == 1) - DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); - if (!fail_only || support->NotEnoughDSCUnits == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); - if (!fail_only || support->NotEnoughDSCSlices == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); - if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ROBSupport == 0) - DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); - if (!fail_only || support->OutstandingRequestsSupport == 0) - DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); - if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); - if (!fail_only || support->TotalAvailablePipesSupport == 0) - DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); - if (!fail_only || support->NumberOfOTGSupport == 0) - DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->EnoughWritebackUnits == 0) - DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); - if (!fail_only || support->CursorSupport == 0) - DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->PrefetchSupported == 0) - DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); - if (!fail_only || support->AvgBandwidthSupport == 0) - DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->DynamicMetadataSupported == 0) - DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); - if (!fail_only || support->VRatioInPrefetchSupported == 0) - DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); - if (!fail_only || support->ExceededMALLSize == 1) - DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->g6_temp_read_support == 0) - DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); - if (!fail_only || support->ImmediateFlipSupport == 0) - DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->LinkCapacitySupport == 0) - DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); - - if (!fail_only || support->ModeSupport == 0) - DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); - DML_LOG_VERBOSE("DML: ===================================== \n"); -} - -static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) -{ - for (unsigned int k = 0; k < display_cfg->num_planes; k++) { - double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { - switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { - case dml2_444: - out_bpp[k] = bpc * 3; - break; - case dml2_s422: - out_bpp[k] = bpc * 2; - break; - case dml2_n422: - out_bpp[k] = bpc * 2; - break; - case dml2_420: - default: - out_bpp[k] = bpc * 1.5; - break; - } - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { - out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; - } else { - out_bpp[k] = 0; - } - DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); - } -} - -static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) -{ - unsigned int remainder; - - if (multiple == 0) - return num; - - remainder = num % multiple; - if (remainder == 0) - return num; - - if (up) - return (num + multiple - remainder); - else - return (num - remainder); -} - -static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) -{ - unsigned int num_active_pipes = 0; - - for (unsigned int k = 0; k < num_planes; k++) { - num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; - } - - DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); - return num_active_pipes; -} - -static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) -{ - unsigned int pipe_idx = 0; - - for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { - pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; - } - - for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { - for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { - pipe_plane[pipe_idx] = plane_idx; - pipe_idx++; - } - } -} - -static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) -{ - bool is_phantom = false; - - if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || - plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { - is_phantom = true; - } - - return is_phantom; -} - -static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) -{ - unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; - - bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); - DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); - return is_phantom; -} - -#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \ -{ \ -unsigned int plane_idx; \ -plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \ -return (type) interval_var[plane_idx]; \ -} - -dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes); -dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes); -dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY); -dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC); -dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear); -dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma); - -dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup); -dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); -dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); -dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); -dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); -dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); -dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); -dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); -dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL); - -#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \ -{ \ -return (type) interval_var[plane_idx]; \ -} - -dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l); -dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l); -dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l); -dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c); -dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c); -dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c); -dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l); -dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c); -dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache); -dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL); -dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines); - -#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \ -{ \ -return (type) interval_var[plane_idx][array_idx]; \ -} - -dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l); -dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c); - -#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \ -{ \ -return (type) internal_var; \ -} - -dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark); -dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark); -dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); -dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark); -dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); -dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency); -dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); - -dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); -dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); -dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); -dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); -dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); -dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); -dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); -dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); -dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); -dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); -dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); -dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame); -dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency); -dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); -dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod); -dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase); -dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); -dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase); -dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); -dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0); - -dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); - -dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); - -dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); -dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); - -dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); -dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); - -dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); -dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); -dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); -dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); -dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); - -dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); -dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); -dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); -dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); -dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); -dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); -dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); - -dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte); - -dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled); -dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark); -dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); -dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b); -dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5); -dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis); - -static void CalculateMaxDETAndMinCompressedBufferSize( - unsigned int ConfigReturnBufferSizeInKByte, - unsigned int ConfigReturnBufferSegmentSizeInKByte, - unsigned int ROBBufferSizeInKByte, - unsigned int MaxNumDPP, - unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size - unsigned int nomDETInKByteOverrideValue, // VBA_DELTA - bool is_mrq_present, - - // Output - unsigned int *MaxTotalDETInKByte, - unsigned int *nomDETInKByte, - unsigned int *MinCompressedBufferSizeInKByte) -{ - if (is_mrq_present) - *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64); - else - *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; - - *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); - *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; - - DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); - DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); - DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); - DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); - DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); - DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); - - if (nomDETInKByteOverrideEnable) { - *nomDETInKByte = nomDETInKByteOverrideValue; - DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); - } -} - -static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) -{ - //unsigned int num_active_planes = display_cfg->num_planes; - - //Progressive To Interlace Unit Effect - for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { - PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { - // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly - //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; - } - } -} - -static bool dml_is_420(enum dml2_source_format_class source_format) -{ - bool val = false; - - switch (source_format) { - case dml2_444_8: - val = 0; - break; - case dml2_444_16: - val = 0; - break; - case dml2_444_32: - val = 0; - break; - case dml2_444_64: - val = 0; - break; - case dml2_420_8: - val = 1; - break; - case dml2_420_10: - val = 1; - break; - case dml2_420_12: - val = 1; - break; - case dml2_422_planar_8: - val = 0; - break; - case dml2_422_planar_10: - val = 0; - break; - case dml2_422_planar_12: - val = 0; - break; - case dml2_422_packed_8: - val = 0; - break; - case dml2_422_packed_10: - val = 0; - break; - case dml2_422_packed_12: - val = 0; - break; - case dml2_rgbe_alpha: - val = 0; - break; - case dml2_rgbe: - val = 0; - break; - case dml2_mono_8: - val = 0; - break; - case dml2_mono_16: - val = 0; - break; - default: - DML_ASSERT(0); - break; - } - return val; -} - -static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) -{ - if (sw_mode == dml2_sw_linear) - return 256; - else if (sw_mode == dml2_sw_256b_2d) - return 256; - else if (sw_mode == dml2_sw_4kb_2d) - return 4096; - else if (sw_mode == dml2_sw_64kb_2d) - return 65536; - else if (sw_mode == dml2_sw_256kb_2d) - return 262144; - else if (sw_mode == dml2_gfx11_sw_linear) - return 256; - else if (sw_mode == dml2_gfx11_sw_64kb_d) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_d_t) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_d_x) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_r_x) - return 65536; - else if (sw_mode == dml2_gfx11_sw_256kb_d_x) - return 262144; - else if (sw_mode == dml2_gfx11_sw_256kb_r_x) - return 262144; - else { - DML_ASSERT(0); - return 256; - } -} - -static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) -{ - bool is_vert = false; - if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { - is_vert = true; - } else { - is_vert = false; - } - return is_vert; -} - -static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) -{ - int unsigned version = 0; - - if (sw_mode == dml2_sw_linear || - sw_mode == dml2_sw_256b_2d || - sw_mode == dml2_sw_4kb_2d || - sw_mode == dml2_sw_64kb_2d || - sw_mode == dml2_sw_256kb_2d) { - version = 12; - } else if (sw_mode == dml2_gfx11_sw_linear || - sw_mode == dml2_gfx11_sw_64kb_d || - sw_mode == dml2_gfx11_sw_64kb_d_t || - sw_mode == dml2_gfx11_sw_64kb_d_x || - sw_mode == dml2_gfx11_sw_64kb_r_x || - sw_mode == dml2_gfx11_sw_256kb_d_x || - sw_mode == dml2_gfx11_sw_256kb_r_x) { - version = 11; - } else { - DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML_ASSERT(0); - } - - return version; -} - -static void CalculateBytePerPixelAndBlockSizes( - enum dml2_source_format_class SourcePixelFormat, - enum dml2_swizzle_mode SurfaceTiling, - unsigned int pitch_y, - unsigned int pitch_c, - - // Output - unsigned int *BytePerPixelY, - unsigned int *BytePerPixelC, - double *BytePerPixelDETY, - double *BytePerPixelDETC, - unsigned int *BlockHeight256BytesY, - unsigned int *BlockHeight256BytesC, - unsigned int *BlockWidth256BytesY, - unsigned int *BlockWidth256BytesC, - unsigned int *MacroTileHeightY, - unsigned int *MacroTileHeightC, - unsigned int *MacroTileWidthY, - unsigned int *MacroTileWidthC, - bool *surf_linear128_l, - bool *surf_linear128_c) -{ - *BytePerPixelDETY = 0; - *BytePerPixelDETC = 0; - *BytePerPixelY = 1; - *BytePerPixelC = 1; - - if (SourcePixelFormat == dml2_444_64) { - *BytePerPixelDETY = 8; - *BytePerPixelDETC = 0; - *BytePerPixelY = 8; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { - *BytePerPixelDETY = 4; - *BytePerPixelDETC = 0; - *BytePerPixelY = 4; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { - *BytePerPixelDETY = 2; - *BytePerPixelDETC = 0; - *BytePerPixelY = 2; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { - *BytePerPixelDETY = 1; - *BytePerPixelDETC = 0; - *BytePerPixelY = 1; - *BytePerPixelC = 0; - } else if (SourcePixelFormat == dml2_rgbe_alpha) { - *BytePerPixelDETY = 4; - *BytePerPixelDETC = 1; - *BytePerPixelY = 4; - *BytePerPixelC = 1; - } else if (SourcePixelFormat == dml2_420_8) { - *BytePerPixelDETY = 1; - *BytePerPixelDETC = 2; - *BytePerPixelY = 1; - *BytePerPixelC = 2; - } else if (SourcePixelFormat == dml2_420_12) { - *BytePerPixelDETY = 2; - *BytePerPixelDETC = 4; - *BytePerPixelY = 2; - *BytePerPixelC = 4; - } else if (SourcePixelFormat == dml2_420_10) { - *BytePerPixelDETY = (double)(4.0 / 3); - *BytePerPixelDETC = (double)(8.0 / 3); - *BytePerPixelY = 2; - *BytePerPixelC = 4; - } else { - DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); - DML_ASSERT(0); - } - - DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); - DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); - DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); - DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); - DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); - DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); - DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); - DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); - DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); - - if (dml_get_gfx_version(SurfaceTiling) == 11) { - *surf_linear128_l = 0; - *surf_linear128_c = 0; - } else { - if (SurfaceTiling == dml2_sw_linear) { - *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); - - if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) - *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); - } - } - - if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { - if (SurfaceTiling == dml2_sw_linear) { - *BlockHeight256BytesY = 1; - } else if (SourcePixelFormat == dml2_444_64) { - *BlockHeight256BytesY = 4; - } else if (SourcePixelFormat == dml2_444_8) { - *BlockHeight256BytesY = 16; - } else { - *BlockHeight256BytesY = 8; - } - *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; - *BlockHeight256BytesC = 0; - *BlockWidth256BytesC = 0; - } else { // dual plane - if (SurfaceTiling == dml2_sw_linear) { - *BlockHeight256BytesY = 1; - *BlockHeight256BytesC = 1; - } else if (SourcePixelFormat == dml2_rgbe_alpha) { - *BlockHeight256BytesY = 8; - *BlockHeight256BytesC = 16; - } else if (SourcePixelFormat == dml2_420_8) { - *BlockHeight256BytesY = 16; - *BlockHeight256BytesC = 8; - } else { - *BlockHeight256BytesY = 8; - *BlockHeight256BytesC = 8; - } - *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; - *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; - } - DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); - DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); - DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); - DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); - - if (dml_get_gfx_version(SurfaceTiling) == 11) { - if (SurfaceTiling == dml2_gfx11_sw_linear) { - *MacroTileHeightY = *BlockHeight256BytesY; - *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; - } - } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { - *MacroTileHeightY = 16 * *BlockHeight256BytesY; - *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = 16 * *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; - } - } else { - *MacroTileHeightY = 32 * *BlockHeight256BytesY; - *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = 32 * *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; - } - } - } else { - unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); - unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling - - if (SurfaceTiling == dml2_sw_linear) { - macro_tile_scale = 1; - } else if (SurfaceTiling == dml2_sw_4kb_2d) { - macro_tile_scale = 4; - } else if (SurfaceTiling == dml2_sw_64kb_2d) { - macro_tile_scale = 16; - } else if (SurfaceTiling == dml2_sw_256kb_2d) { - macro_tile_scale = 32; - } else { - DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); - DML_ASSERT(0); - } - - *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; - *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; - *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; - if (*MacroTileHeightC == 0) { - *MacroTileWidthC = 0; - } else { - *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; - } - } - - DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); - DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); - DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); - DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); -} - -static void CalculateSinglePipeDPPCLKAndSCLThroughput( - double HRatio, - double HRatioChroma, - double VRatio, - double VRatioChroma, - double MaxDCHUBToPSCLThroughput, - double MaxPSCLToLBThroughput, - double PixelClock, - enum dml2_source_format_class SourcePixelFormat, - unsigned int HTaps, - unsigned int HTapsChroma, - unsigned int VTaps, - unsigned int VTapsChroma, - - // Output - double *PSCL_THROUGHPUT, - double *PSCL_THROUGHPUT_CHROMA, - double *DPPCLKUsingSingleDPP) -{ - double DPPCLKUsingSingleDPPLuma; - double DPPCLKUsingSingleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) - DPPCLKUsingSingleDPPLuma = 2 * PixelClock; - - if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) - DPPCLKUsingSingleDPPChroma = 2 * PixelClock; - *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); - } -} - -static void CalculateSwathWidth( - const struct dml2_display_cfg *display_cfg, - bool ForceSingleDPP, - unsigned int NumberOfActiveSurfaces, - enum dml2_odm_mode ODMMode[], - unsigned int BytePerPixY[], - unsigned int BytePerPixC[], - unsigned int Read256BytesBlockHeightY[], - unsigned int Read256BytesBlockHeightC[], - unsigned int Read256BytesBlockWidthY[], - unsigned int Read256BytesBlockWidthC[], - bool surf_linear128_l[], - bool surf_linear128_c[], - unsigned int DPPPerSurface[], - - // Output - unsigned int req_per_swath_ub_l[], - unsigned int req_per_swath_ub_c[], - unsigned int SwathWidthSingleDPPY[], // post-rotated plane width - unsigned int SwathWidthSingleDPPC[], - unsigned int SwathWidthY[], // per-pipe - unsigned int SwathWidthC[], // per-pipe - unsigned int MaximumSwathHeightY[], - unsigned int MaximumSwathHeightC[], - unsigned int swath_width_luma_ub[], // per-pipe - unsigned int swath_width_chroma_ub[]) // per-pipe -{ - enum dml2_odm_mode MainSurfaceODMMode; - double odm_hactive_factor = 1.0; - unsigned int req_width_horz_y; - unsigned int req_width_horz_c; - unsigned int surface_width_ub_l; - unsigned int surface_height_ub_l; - unsigned int surface_width_ub_c; - unsigned int surface_height_ub_c; - - DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { - SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; - } else { - SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - } - - DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); - - MainSurfaceODMMode = ODMMode[k]; - - if (ForceSingleDPP) { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - } else { - if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) - odm_hactive_factor = 4.0; - else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) - odm_hactive_factor = 3.0; - else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) - odm_hactive_factor = 2.0; - - if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { - SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); - } else if (DPPPerSurface[k] == 2) { - SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; - } else { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - } - } - - DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); - DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); - DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); - - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { - SwathWidthC[k] = SwathWidthY[k] / 2; - SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; - } else { - SwathWidthC[k] = SwathWidthY[k]; - SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; - } - - if (ForceSingleDPP == true) { - SwathWidthY[k] = SwathWidthSingleDPPY[k]; - SwathWidthC[k] = SwathWidthSingleDPPC[k]; - } - - req_width_horz_y = Read256BytesBlockWidthY[k]; - req_width_horz_c = Read256BytesBlockWidthC[k]; - - if (surf_linear128_l[k]) - req_width_horz_y = req_width_horz_y / 2; - - if (surf_linear128_c[k]) - req_width_horz_c = req_width_horz_c / 2; - - surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); - surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); - surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); - surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); - - DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); - DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); - DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); - DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); - DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); - DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); - - req_per_swath_ub_l[k] = 0; - req_per_swath_ub_c[k] = 0; - if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { - MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; - MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; - if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { - swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); - } else { - swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); - } - req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; - - if (BytePerPixC[k] > 0) { - if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { - swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); - } else { - swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); - } - req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; - } else { - swath_width_chroma_ub[k] = 0; - } - } else { - MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; - MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; - - if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { - swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); - } else { - swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); - } - req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; - if (BytePerPixC[k] > 0) { - if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { - swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); - } else { - swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); - } - req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; - } else { - swath_width_chroma_ub[k] = 0; - } - } - - DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); - } -} - -static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) -{ - bool unb_req_ok = false; - bool unb_req_en = false; - - unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); - unb_req_en = unb_req_ok; - - if (unb_req_force_en) { - unb_req_en = unb_req_force_val && unb_req_ok; - } - DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); - DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); - DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); - DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); - return unb_req_en; -} - -static void CalculateDETBufferSize( - struct dml2_core_shared_CalculateDETBufferSize_locals *l, - const struct dml2_display_cfg *display_cfg, - bool ForceSingleDPP, - unsigned int NumberOfActiveSurfaces, - bool UnboundedRequestEnabled, - unsigned int nomDETInKByte, - unsigned int MaxTotalDETInKByte, - unsigned int ConfigReturnBufferSizeInKByte, - unsigned int MinCompressedBufferSizeInKByte, - unsigned int ConfigReturnBufferSegmentSizeInkByte, - unsigned int CompressedBufferSegmentSizeInkByte, - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - unsigned int full_swath_bytes_l[], - unsigned int full_swath_bytes_c[], - unsigned int DPPPerSurface[], - // Output - unsigned int DETBufferSizeInKByte[], - unsigned int *CompressedBufferSizeInkByte) -{ - memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals)); - - bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; - bool NextPotentialSurfaceToAssignDETPieceFound; - bool MinimizeReallocationSuccess = false; - - DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); - DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); - DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); - DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); - - // Note: Will use default det size if that fits 2 swaths - if (UnboundedRequestEnabled) { - if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { - DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb; - } else { - DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); - } - *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; - } else { - l->DETBufferSizePoolInKByte = MaxTotalDETInKByte; - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByte[k] = 0; - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { - l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; - } else { - l->max_minDET = nomDETInKByte; - } - l->minDET = 128; - l->minDET_pipe = 0; - - // add DET resource until can hold 2 full swaths - while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) { - if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET) - l->minDET_pipe = l->minDET; - l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; - } - - DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); - DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); - DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); - - if (l->minDET_pipe == 0) { - l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); - DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); - } - - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - DETBufferSizeInKByte[k] = 0; - } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { - DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb; - l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb; - } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) { - DETBufferSizeInKByte[k] = l->minDET_pipe; - l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; - } - - DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); - DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); - } - - if (display_cfg->minimize_det_reallocation) { - MinimizeReallocationSuccess = true; - // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global - // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on - // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a - // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. - - // Calculate total pixel rate - for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { - l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz; - } - - // Calculate per stream DET budget - for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { - l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate); - l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k]; - } - - // Calculate the per stream total bandwidth - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); - - // Check the minimum can be satisfied by budget - if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { - l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); - } else { - MinimizeReallocationSuccess = false; - break; - } - } - } - - if (MinimizeReallocationSuccess) { - // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams - // budget proportionally across its planes - l->ResidualDETAfterRounding = MaxTotalDETInKByte; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index]) - * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]); - - if (l->IdealDETBudget > DETBufferSizeInKByte[k]) { - l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k]; - if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]) - l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]; - - /* split the additional budgeted DET among the pipes per plane */ - DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k])); - l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget; - } - - // Round down to segment size - DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte; - - l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); - } - } - } - } - - if (!MinimizeReallocationSuccess) { - l->TotalBandwidth = 0; - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; - } - } - DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - } - DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); - DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); - l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - DETPieceAssignedToThisSurfaceAlready[k] = true; - } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) { - DETPieceAssignedToThisSurfaceAlready[k] = true; - l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; - } else { - DETPieceAssignedToThisSurfaceAlready[k] = false; - } - DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); - DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); - } - - for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { - NextPotentialSurfaceToAssignDETPieceFound = false; - l->NextSurfaceToAssignDETPiece = 0; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); - if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || - ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { - l->NextSurfaceToAssignDETPiece = k; - NextPotentialSurfaceToAssignDETPieceFound = true; - } - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); - DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); - } - - if (NextPotentialSurfaceToAssignDETPieceFound) { - l->NextDETBufferPieceInKByte = (unsigned int)(math_min2( - math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece / - ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)) - * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, - math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); - - DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); - DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); - DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); - DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); - DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); - - DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); - DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); - - l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; - DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; - l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - } - } - } - *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; - } - *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; - - DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); - DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); - } -} - -static double CalculateRequiredDispclk( - enum dml2_odm_mode ODMMode, - double PixelClock, - bool isTMDS420) -{ - double DispClk; - - if (ODMMode == dml2_odm_mode_combine_4to1) { - DispClk = PixelClock / 4.0; - } else if (ODMMode == dml2_odm_mode_combine_3to1) { - DispClk = PixelClock / 3.0; - } else if (ODMMode == dml2_odm_mode_combine_2to1) { - DispClk = PixelClock / 2.0; - } else { - DispClk = PixelClock; - } - - if (isTMDS420) { - double TMDS420MinPixClock = PixelClock / 2.0; - DispClk = math_max2(DispClk, TMDS420MinPixClock); - } - - return DispClk; -} - -static double TruncToValidBPP( - struct dml2_core_shared_TruncToValidBPP_locals *l, - double LinkBitRate, - unsigned int Lanes, - unsigned int HTotal, - unsigned int HActive, - double PixelClock, - double DesiredBPP, - bool DSCEnable, - enum dml2_output_encoder_class Output, - enum dml2_output_format_class Format, - unsigned int DSCInputBitPerComponent, - unsigned int DSCSlices, - unsigned int AudioRate, - unsigned int AudioLayout, - enum dml2_odm_mode ODMModeNoDSC, - enum dml2_odm_mode ODMModeDSC, - - // Output - unsigned int *RequiredSlots) -{ - double MaxLinkBPP; - unsigned int MinDSCBPP; - double MaxDSCBPP; - unsigned int NonDSCBPP0; - unsigned int NonDSCBPP1; - unsigned int NonDSCBPP2; - enum dml2_odm_mode ODMMode; - - if (Format == dml2_420) { - NonDSCBPP0 = 12; - NonDSCBPP1 = 15; - NonDSCBPP2 = 18; - MinDSCBPP = 6; - MaxDSCBPP = 16; - } else if (Format == dml2_444) { - NonDSCBPP0 = 24; - NonDSCBPP1 = 30; - NonDSCBPP2 = 36; - MinDSCBPP = 8; - MaxDSCBPP = 16; - } else { - if (Output == dml2_hdmi || Output == dml2_hdmifrl) { - NonDSCBPP0 = 24; - NonDSCBPP1 = 24; - NonDSCBPP2 = 24; - } else { - NonDSCBPP0 = 16; - NonDSCBPP1 = 20; - NonDSCBPP2 = 24; - } - if (Format == dml2_n422 || Output == dml2_hdmifrl) { - MinDSCBPP = 7; - MaxDSCBPP = 16; - } else { - MinDSCBPP = 8; - MaxDSCBPP = 16; - } - } - if (Output == dml2_dp2p0) { - MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; - } else if (DSCEnable && Output == dml2_dp) { - MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); - } else { - MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; - } - - ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; - - if (ODMMode == dml2_odm_mode_split_1to2) { - MaxLinkBPP = 2 * MaxLinkBPP; - } - - if (DesiredBPP == 0) { - if (DSCEnable) { - if (MaxLinkBPP < MinDSCBPP) { - return __DML2_CALCS_DPP_INVALID__; - } else if (MaxLinkBPP >= MaxDSCBPP) { - return MaxDSCBPP; - } else { - return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; - } - } else { - if (MaxLinkBPP >= NonDSCBPP2) { - return NonDSCBPP2; - } else if (MaxLinkBPP >= NonDSCBPP1) { - return NonDSCBPP1; - } else if (MaxLinkBPP >= NonDSCBPP0) { - return NonDSCBPP0; - } else { - return __DML2_CALCS_DPP_INVALID__; - } - } - } else { - if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || - (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { - return __DML2_CALCS_DPP_INVALID__; - } else { - return DesiredBPP; - } - } -} - -// updated for dcn4 -static unsigned int dscceComputeDelay( - unsigned int bpc, - double BPP, - unsigned int sliceWidth, - unsigned int numSlices, - enum dml2_output_format_class pixelFormat, - enum dml2_output_encoder_class Output) -{ - // valid bpc = source bits per component in the set of {8, 10, 12} - // valid bpp = increments of 1/16 of a bit - // min = 6/7/8 in N420/N422/444, respectively - // max = such that compression is 1:1 - //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) - //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} - //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} - - // fixed value - unsigned int rcModelSize = 8192; - - // N422/N420 operate at 2 pixels per clock - unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; - - if (pixelFormat == dml2_420) - pixelsPerClock = 2; - // #all other modes operate at 1 pixel per clock - else if (pixelFormat == dml2_444) - pixelsPerClock = 1; - else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) - pixelsPerClock = 2; - else - pixelsPerClock = 1; - - //initial transmit delay as per PPS - initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); - - //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) - slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; - - padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; - - if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { - if ((initial_xmit_delay + padding_pixels) % 3 == 1) { - initial_xmit_delay++; - } - } - - //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard - if (bpc == 8) - ssm_group_priming_delay = 83; - else if (bpc == 10) - ssm_group_priming_delay = 91; - else if (bpc == 12) - ssm_group_priming_delay = 115; - else if (bpc == 14) - ssm_group_priming_delay = 123; - else - ssm_group_priming_delay = 128; - - //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice - slice_width_groups = (slice_width_modified + 2) / 3; - - //determine number of padded pixels in the last group of a slice line, computed as - slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; - - //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered - number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; - - //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay - //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay - ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; - - //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels - ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; - - //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay - groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; - - //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice - //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice - lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next - - //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached - //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay - additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; - - //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) - ssm_pipeline_delay = 2; - - //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) - obsm_pipeline_delay = 1; - - //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes - if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) - cycles_per_group = 6; - else - cycles_per_group = 3; - //delay of the bit stream contruction layer in pixels is the sum of: - //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice - //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) - //3. additional group of delay if initial transmit delay is reached exactly in a group - //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) - group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; - pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; - - //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) - pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; - - DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); - DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); - DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); - DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); - DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); - DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); - DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); - return pixels; -} - -//updated in dcn4 -static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) -{ - unsigned int Delay = 0; - unsigned int dispclk_per_dscclk = 3; - - // sfr - Delay = Delay + 2; - - if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { - dispclk_per_dscclk = 3 * 2; - } - - if (pixelFormat == dml2_420) { - //dscc top delay for pixel compression layer - Delay = Delay + 16 * dispclk_per_dscclk; - - // dscc - input deserializer - Delay = Delay + 5; - - // dscc - input cdc fifo - Delay = Delay + 1 + 4 * dispclk_per_dscclk; - - // dscc - output cdc fifo - Delay = Delay + 3 + 1 * dispclk_per_dscclk; - - // dscc - cdc uncertainty - Delay = Delay + 3 + 3 * dispclk_per_dscclk; - } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { - //dscc top delay for pixel compression layer - Delay = Delay + 16 * dispclk_per_dscclk; - // dsccif - Delay = Delay + 1; - // dscc - input deserializer - Delay = Delay + 5; - // dscc - input cdc fifo - Delay = Delay + 1 + 4 * dispclk_per_dscclk; - - - // dscc - output cdc fifo - Delay = Delay + 3 + 1 * dispclk_per_dscclk; - // dscc - cdc uncertainty - Delay = Delay + 3 + 3 * dispclk_per_dscclk; - } else if (pixelFormat == dml2_s422) { - //dscc top delay for pixel compression layer - Delay = Delay + 17 * dispclk_per_dscclk; - - // dscc - input deserializer - Delay = Delay + 3; - // dscc - input cdc fifo - Delay = Delay + 1 + 4 * dispclk_per_dscclk; - // dscc - output cdc fifo - Delay = Delay + 3 + 1 * dispclk_per_dscclk; - // dscc - cdc uncertainty - Delay = Delay + 3 + 3 * dispclk_per_dscclk; - } else { - //dscc top delay for pixel compression layer - Delay = Delay + 16 * dispclk_per_dscclk; - // dscc - input deserializer - Delay = Delay + 3; - // dscc - input cdc fifo - Delay = Delay + 1 + 4 * dispclk_per_dscclk; - // dscc - output cdc fifo - Delay = Delay + 3 + 1 * dispclk_per_dscclk; - - // dscc - cdc uncertainty - Delay = Delay + 3 + 3 * dispclk_per_dscclk; - } - - // sft - Delay = Delay + 1; - DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); - DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); - - return Delay; -} - -static unsigned int CalculateHostVMDynamicLevels( - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMinPageSize, - unsigned int HostVMMaxNonCachedPageTableLevels) -{ - unsigned int HostVMDynamicLevels = 0; - - if (GPUVMEnable && HostVMEnable) { - if (HostVMMinPageSize < 2048) - HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; - else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) - HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); - else - HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); - } else { - HostVMDynamicLevels = 0; - } - return HostVMDynamicLevels; -} - -static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) -{ - unsigned int extra_dpde_bytes; - unsigned int extra_mpde_bytes; - unsigned int MacroTileSizeBytes; - unsigned int vp_height_dpte_ub; - - unsigned int meta_surface_bytes; - unsigned int vm_bytes; - unsigned int vp_height_meta_ub; - unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this - - *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; - *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; - if (p->SurfaceTiling == dml2_sw_linear) { - *p->meta_row_height = 32; - *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); - *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? - } else if (!dml_is_vertical_rotation(p->RotationAngle)) { - *p->meta_row_height = *p->MetaRequestHeight; - if (p->ViewportStationary && p->NumberOfDPPs == 1) { - *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); - } else { - *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); - } - *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); - } else { - *p->meta_row_height = *p->MetaRequestWidth; - if (p->ViewportStationary && p->NumberOfDPPs == 1) { - *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); - } else { - *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); - } - *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); - } - - if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { - vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); - } else if (!dml_is_vertical_rotation(p->RotationAngle)) { - vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); - } else { - vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); - } - - meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); - DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); - DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); - if (p->GPUVMEnable == true) { - double meta_vmpg_bytes = 4.0 * 1024.0; - *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); - extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); - } else { - *p->meta_pte_bytes_per_frame_ub = 0; - extra_mpde_bytes = 0; - } - - if (!p->DCCEnable || !p->mrq_present) { - *p->meta_pte_bytes_per_frame_ub = 0; - extra_mpde_bytes = 0; - *p->meta_row_bytes = 0; - } - - if (!p->GPUVMEnable) { - *p->PixelPTEBytesPerRow = 0; - *p->PixelPTEBytesPerRowStorage = 0; - *p->dpte_row_width_ub = 0; - *p->dpte_row_height = 0; - *p->dpte_row_height_linear = 0; - *p->PixelPTEBytesPerRow_one_row_per_frame = 0; - *p->dpte_row_width_ub_one_row_per_frame = 0; - *p->dpte_row_height_one_row_per_frame = 0; - *p->vmpg_width = 0; - *p->vmpg_height = 0; - *p->PixelPTEReqWidth = 0; - *p->PixelPTEReqHeight = 0; - *p->PTERequestSize = 0; - *p->dpde0_bytes_per_frame_ub = 0; - return 0; - } - - MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; - - if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { - vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); - } else if (!dml_is_vertical_rotation(p->RotationAngle)) { - vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); - } else { - vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); - } - - if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { - *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); - extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); - } else { - *p->dpde0_bytes_per_frame_ub = 0; - extra_dpde_bytes = 0; - } - - vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; - - DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); - DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); - DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); - DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); - DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); - DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); - DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); - DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); - DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); - DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); - DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); - DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); - DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); - DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); - DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); - - if (p->SurfaceTiling == dml2_sw_linear) { - *p->PixelPTEReqHeight = 1; - *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; - PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; - *p->PTERequestSize = 64; - - *p->vmpg_height = 1; - *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; - } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE - *p->PixelPTEReqHeight = p->MacroTileHeight; - *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); - *p->PTERequestSize = 64; - - *p->vmpg_height = p->MacroTileHeight; - *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); - - } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile - // one 64KB tile, is 16x16x256B req - *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; - *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; - *p->PTERequestSize = 128; - - *p->vmpg_height = *p->PixelPTEReqHeight; - *p->vmpg_width = *p->PixelPTEReqWidth; - } else { - // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways - *p->PixelPTEReqHeight = p->MacroTileHeight; - *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); - *p->PTERequestSize = 64; - - *p->vmpg_height = p->MacroTileHeight; - *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); - - if (p->GPUVMEnable == true) { - DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", - __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); - DML_ASSERT(0); - } - } - - DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); - DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); - DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); - DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); - DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); - DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); - DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); - - *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; - *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); - *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); - *p->dpte_row_height_linear = 0; - - if (p->SurfaceTiling == dml2_sw_linear) { - *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); - *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); - *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); - - // VBA_DELTA, VBA doesn't have programming value for pte row height linear. - *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); - if (*p->dpte_row_height_linear > 128) - *p->dpte_row_height_linear = 128; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); -#endif - - } else if (!dml_is_vertical_rotation(p->RotationAngle)) { - *p->dpte_row_height = *p->PixelPTEReqHeight; - - if (p->GPUVMMinPageSizeKBytes > 64) { - *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); - } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { - *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); - } else { - *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); -#endif - - *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; - } else { - *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); - - if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { - *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); - } else { - *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); - } - - *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); -#endif - } - - if (p->GPUVMEnable != true) { - *p->PixelPTEBytesPerRow = 0; - *p->PixelPTEBytesPerRow_one_row_per_frame = 0; - } - - *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); - DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); - DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); - DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); - DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); - DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); - DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); -#endif - - return vm_bytes; -} // CalculateVMAndRowBytes - -static unsigned int CalculatePrefetchSourceLines( - double VRatio, - unsigned int VTaps, - bool Interlace, - bool ProgressiveToInterlaceUnitInOPP, - unsigned int SwathHeight, - enum dml2_rotation_angle RotationAngle, - bool mirrored, - bool ViewportStationary, - unsigned int SwathWidth, - unsigned int ViewportHeight, - unsigned int ViewportXStart, - unsigned int ViewportYStart, - - // Output - unsigned int *VInitPreFill, - unsigned int *MaxNumSwath) -{ - - unsigned int vp_start_rot = 0; - unsigned int sw0_tmp = 0; - unsigned int MaxPartialSwath = 0; - double numLines = 0; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); - DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); - DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); - DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); - DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); - DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); -#endif - if (ProgressiveToInterlaceUnitInOPP) - *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); - else - *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); - - if (ViewportStationary) { - if (RotationAngle == dml2_rotation_180) { - vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); - } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { - vp_start_rot = ViewportXStart; - } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { - vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); - } else { - vp_start_rot = ViewportYStart; - } - sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); - if (sw0_tmp < *VInitPreFill) { - *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); - } else { - *MaxNumSwath = 1; - } - MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); - } else { - *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); - if (*VInitPreFill > 1) { - MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); - } else { - MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); - } - } - numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); - DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); - DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); - DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); - DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); -#endif - return (unsigned int)(numLines); - -} - -static void CalculateRowBandwidth( - bool GPUVMEnable, - bool use_one_row_for_frame, - enum dml2_source_format_class SourcePixelFormat, - double VRatio, - double VRatioChroma, - bool DCCEnable, - double LineTime, - unsigned int PixelPTEBytesPerRowLuma, - unsigned int PixelPTEBytesPerRowChroma, - unsigned int dpte_row_height_luma, - unsigned int dpte_row_height_chroma, - - bool mrq_present, - unsigned int meta_row_bytes_per_row_ub_l, - unsigned int meta_row_bytes_per_row_ub_c, - unsigned int meta_row_height_luma, - unsigned int meta_row_height_chroma, - - // Output - double *dpte_row_bw, - double *meta_row_bw) -{ - if (!DCCEnable || !mrq_present) { - *meta_row_bw = 0; - } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { - *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) - + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); - } else { - *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); - } - - if (GPUVMEnable != true) { - *dpte_row_bw = 0; - } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) - + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); - } else { - *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); - } -} - -static void CalculateMALLUseForStaticScreen( - const struct dml2_display_cfg *display_cfg, - unsigned int NumberOfActiveSurfaces, - unsigned int MALLAllocatedForDCN, - unsigned int SurfaceSizeInMALL[], - bool one_row_per_frame_fits_in_buffer[], - - // Output - bool is_using_mall_for_ss[]) -{ - - unsigned int SurfaceToAddToMALL; - bool CanAddAnotherSurfaceToMALL; - unsigned int TotalSurfaceSizeInMALL; - - TotalSurfaceSizeInMALL = 0; - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); - if (is_using_mall_for_ss[k]) - TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); -#endif - } - - SurfaceToAddToMALL = 0; - CanAddAnotherSurfaceToMALL = true; - while (CanAddAnotherSurfaceToMALL) { - CanAddAnotherSurfaceToMALL = false; - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && - !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && - (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { - CanAddAnotherSurfaceToMALL = true; - SurfaceToAddToMALL = k; - DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); - } - } - if (CanAddAnotherSurfaceToMALL) { - is_using_mall_for_ss[SurfaceToAddToMALL] = true; - TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); - DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); -#endif - } - } -} - -static void CalculateDCCConfiguration( - bool DCCEnabled, - bool DCCProgrammingAssumesScanDirectionUnknown, - enum dml2_source_format_class SourcePixelFormat, - unsigned int SurfaceWidthLuma, - unsigned int SurfaceWidthChroma, - unsigned int SurfaceHeightLuma, - unsigned int SurfaceHeightChroma, - unsigned int nomDETInKByte, - unsigned int RequestHeight256ByteLuma, - unsigned int RequestHeight256ByteChroma, - enum dml2_swizzle_mode TilingFormat, - unsigned int BytePerPixelY, - unsigned int BytePerPixelC, - double BytePerPixelDETY, - double BytePerPixelDETC, - enum dml2_rotation_angle RotationAngle, - - // Output - enum dml2_core_internal_request_type *RequestLuma, - enum dml2_core_internal_request_type *RequestChroma, - unsigned int *MaxUncompressedBlockLuma, - unsigned int *MaxUncompressedBlockChroma, - unsigned int *MaxCompressedBlockLuma, - unsigned int *MaxCompressedBlockChroma, - unsigned int *IndependentBlockLuma, - unsigned int *IndependentBlockChroma) -{ - unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; - - unsigned int segment_order_horz_contiguous_luma; - unsigned int segment_order_horz_contiguous_chroma; - unsigned int segment_order_vert_contiguous_luma; - unsigned int segment_order_vert_contiguous_chroma; - - unsigned int req128_horz_wc_l; - unsigned int req128_horz_wc_c; - unsigned int req128_vert_wc_l; - unsigned int req128_vert_wc_c; - - unsigned int yuv420; - unsigned int horz_div_l; - unsigned int horz_div_c; - unsigned int vert_div_l; - unsigned int vert_div_c; - - unsigned int swath_buf_size; - double detile_buf_vp_horz_limit; - double detile_buf_vp_vert_limit; - - unsigned int MAS_vp_horz_limit; - unsigned int MAS_vp_vert_limit; - unsigned int max_vp_horz_width; - unsigned int max_vp_vert_height; - unsigned int eff_surf_width_l; - unsigned int eff_surf_width_c; - unsigned int eff_surf_height_l; - unsigned int eff_surf_height_c; - - unsigned int full_swath_bytes_horz_wc_l; - unsigned int full_swath_bytes_horz_wc_c; - unsigned int full_swath_bytes_vert_wc_l; - unsigned int full_swath_bytes_vert_wc_c; - - if (dml_is_420(SourcePixelFormat)) - yuv420 = 1; - else - yuv420 = 0; - horz_div_l = 1; - horz_div_c = 1; - vert_div_l = 1; - vert_div_c = 1; - - if (BytePerPixelY == 1) - vert_div_l = 0; - if (BytePerPixelC == 1) - vert_div_c = 0; - - if (BytePerPixelC == 0) { - swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; - detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); - detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); - } else { - swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; - detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); - detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); - } - - if (SourcePixelFormat == dml2_420_10) { - detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; - detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; - } - - detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); - detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); - - MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; - MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); - max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); - max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); - eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); - eff_surf_width_c = eff_surf_width_l / (1 + yuv420); - eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); - eff_surf_height_c = eff_surf_height_l / (1 + yuv420); - - full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; - full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; - if (BytePerPixelC > 0) { - full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; - full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; - } else { - full_swath_bytes_horz_wc_c = 0; - full_swath_bytes_vert_wc_c = 0; - } - - if (SourcePixelFormat == dml2_420_10) { - full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); - full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); - full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); - full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); - } - - if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 0; - req128_horz_wc_c = 0; - } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 0; - req128_horz_wc_c = 1; - } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { - req128_horz_wc_l = 1; - req128_horz_wc_c = 0; - } else { - req128_horz_wc_l = 1; - req128_horz_wc_c = 1; - } - - if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 0; - req128_vert_wc_c = 0; - } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 0; - req128_vert_wc_c = 1; - } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { - req128_vert_wc_l = 1; - req128_vert_wc_c = 0; - } else { - req128_vert_wc_l = 1; - req128_vert_wc_c = 1; - } - - if (BytePerPixelY == 2) { - segment_order_horz_contiguous_luma = 0; - segment_order_vert_contiguous_luma = 1; - } else { - segment_order_horz_contiguous_luma = 1; - segment_order_vert_contiguous_luma = 0; - } - - if (BytePerPixelC == 2) { - segment_order_horz_contiguous_chroma = 0; - segment_order_vert_contiguous_chroma = 1; - } else { - segment_order_horz_contiguous_chroma = 1; - segment_order_vert_contiguous_chroma = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); - DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); - DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); - DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); - DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); - DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); - DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); - DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); -#endif - if (DCCProgrammingAssumesScanDirectionUnknown == true) { - if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { - *RequestLuma = dml2_core_internal_request_type_256_bytes; - } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { - *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; - } - if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { - *RequestChroma = dml2_core_internal_request_type_256_bytes; - } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { - *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; - } - } else if (!dml_is_vertical_rotation(RotationAngle)) { - if (req128_horz_wc_l == 0) { - *RequestLuma = dml2_core_internal_request_type_256_bytes; - } else if (segment_order_horz_contiguous_luma == 0) { - *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; - } - if (req128_horz_wc_c == 0) { - *RequestChroma = dml2_core_internal_request_type_256_bytes; - } else if (segment_order_horz_contiguous_chroma == 0) { - *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; - } - } else { - if (req128_vert_wc_l == 0) { - *RequestLuma = dml2_core_internal_request_type_256_bytes; - } else if (segment_order_vert_contiguous_luma == 0) { - *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; - } - if (req128_vert_wc_c == 0) { - *RequestChroma = dml2_core_internal_request_type_256_bytes; - } else if (segment_order_vert_contiguous_chroma == 0) { - *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; - } else { - *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; - } - } - - if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 256; - *IndependentBlockLuma = 0; - } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 128; - *IndependentBlockLuma = 128; - } else { - *MaxUncompressedBlockLuma = 256; - *MaxCompressedBlockLuma = 64; - *IndependentBlockLuma = 64; - } - - if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 256; - *IndependentBlockChroma = 0; - } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 128; - *IndependentBlockChroma = 128; - } else { - *MaxUncompressedBlockChroma = 256; - *MaxCompressedBlockChroma = 64; - *IndependentBlockChroma = 64; - } - - if (DCCEnabled != true || BytePerPixelC == 0) { - *MaxUncompressedBlockChroma = 0; - *MaxCompressedBlockChroma = 0; - *IndependentBlockChroma = 0; - } - - if (DCCEnabled != true) { - *MaxUncompressedBlockLuma = 0; - *MaxCompressedBlockLuma = 0; - *IndependentBlockLuma = 0; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); - DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); - DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); - DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); - DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); - DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); -#endif - -} - -static void calculate_mcache_row_bytes( - struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) -{ - unsigned int vmpg_bytes = 0; - unsigned int blk_bytes = 0; - float meta_per_mvmpg_per_channel = 0; - unsigned int est_blk_per_vmpg = 2; - unsigned int mvmpg_per_row_ub = 0; - unsigned int full_vp_width_mvmpg_aligned = 0; - unsigned int full_vp_height_mvmpg_aligned = 0; - unsigned int meta_per_mvmpg_per_channel_ub = 0; - unsigned int mvmpg_per_mcache; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); - DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); - DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); - DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); - DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); - DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); - DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); - DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); - DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); - DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); - DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); - DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); - DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); - DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); - DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); - DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); -#endif - DML_ASSERT(p->mcache_line_size_bytes != 0); - DML_ASSERT(p->mcache_size_bytes != 0); - - *p->mvmpg_width = 0; - *p->mvmpg_height = 0; - - if (p->full_vp_height == 0 && p->full_vp_width == 0) { - *p->num_mcaches = 0; - *p->mcache_row_bytes = 0; - *p->mcache_row_bytes_per_channel = 0; - } else { - blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); - - // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size - vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; - - //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. - // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. - // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. - *p->mvmpg_width = p->blk_width; - *p->mvmpg_height = p->blk_height; - if (p->gpuvm_enable) { - if (vmpg_bytes >= blk_bytes) { - *p->mvmpg_width = p->vmpg_width; - *p->mvmpg_height = p->vmpg_height; - } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { - DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); - DML_ASSERT(0); - } - } - - //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c - full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); - full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); - - *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; - - //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. - if (!p->surf_vert) { //horizontal access - if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) - *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; - else - *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; - mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; - } else { //vertical access - if (p->vp_stationary == 1) - *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; - else - *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; - mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; - } - - if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; - - //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic - if (p->surf_vert && vmpg_bytes > blk_bytes) { - meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; - } - - *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom - } else { - meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; - - if (!p->surf_vert) - *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; - else - *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); - } - - meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); - - //but for 4KB vmpg with 64KB tile blk - if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) - meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; - - // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, - // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. - if (p->gpuvm_enable || p->surf_vert) { - *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; - *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans; - } else { // horizontal and gpuvm disable - *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; - if (p->mcache_line_size_bytes != 0) - *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); - } - - *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref - if (p->mcache_size_bytes != 0) - *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1); - - mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; - *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); - DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); - DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); - DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); - DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); - DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); - DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); - DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); - DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); -#endif - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); - DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); - DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); -#endif - DML_ASSERT(*p->num_mcaches > 0); -} - -static void calculate_mcache_setting( - struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_calculate_mcache_setting_params *p) -{ - unsigned int n; - - struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; - memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); - - *p->num_mcaches_l = 0; - *p->mcache_row_bytes_l = 0; - *p->mcache_row_bytes_per_channel_l = 0; - *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; - *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; - - *p->num_mcaches_c = 0; - *p->mcache_row_bytes_c = 0; - *p->mcache_row_bytes_per_channel_c = 0; - *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; - *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; - - *p->mall_comb_mcache_l = 0; - *p->mall_comb_mcache_c = 0; - *p->lc_comb_mcache = 0; - - if (!p->dcc_enable) - return; - - l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; - - l->l_p.num_chans = p->num_chans; - l->l_p.mem_word_bytes = p->mem_word_bytes; - l->l_p.mcache_size_bytes = p->mcache_size_bytes; - l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; - l->l_p.gpuvm_enable = p->gpuvm_enable; - l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; - l->l_p.surf_vert = p->surf_vert; - l->l_p.vp_stationary = p->vp_stationary; - l->l_p.tiling_mode = p->tiling_mode; - l->l_p.vp_start_x = p->vp_start_x_l; - l->l_p.vp_start_y = p->vp_start_y_l; - l->l_p.full_vp_width = p->full_vp_width_l; - l->l_p.full_vp_height = p->full_vp_height_l; - l->l_p.blk_width = p->blk_width_l; - l->l_p.blk_height = p->blk_height_l; - l->l_p.vmpg_width = p->vmpg_width_l; - l->l_p.vmpg_height = p->vmpg_height_l; - l->l_p.full_swath_bytes = p->full_swath_bytes_l; - l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; - - // output - l->l_p.num_mcaches = p->num_mcaches_l; - l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; - l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l; - l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; - l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; - l->l_p.mvmpg_width = &l->mvmpg_width_l; - l->l_p.mvmpg_height = &l->mvmpg_height_l; - l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; - l->l_p.meta_row_width_ub = &l->meta_row_width_l; - l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; - - calculate_mcache_row_bytes(scratch, &l->l_p); - DML_ASSERT(*p->num_mcaches_l > 0); - - if (l->is_dual_plane) { - l->c_p.num_chans = p->num_chans; - l->c_p.mem_word_bytes = p->mem_word_bytes; - l->c_p.mcache_size_bytes = p->mcache_size_bytes; - l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; - l->c_p.gpuvm_enable = p->gpuvm_enable; - l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; - l->c_p.surf_vert = p->surf_vert; - l->c_p.vp_stationary = p->vp_stationary; - l->c_p.tiling_mode = p->tiling_mode; - l->c_p.vp_start_x = p->vp_start_x_c; - l->c_p.vp_start_y = p->vp_start_y_c; - l->c_p.full_vp_width = p->full_vp_width_c; - l->c_p.full_vp_height = p->full_vp_height_c; - l->c_p.blk_width = p->blk_width_c; - l->c_p.blk_height = p->blk_height_c; - l->c_p.vmpg_width = p->vmpg_width_c; - l->c_p.vmpg_height = p->vmpg_height_c; - l->c_p.full_swath_bytes = p->full_swath_bytes_c; - l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; - - // output - l->c_p.num_mcaches = p->num_mcaches_c; - l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; - l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c; - l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; - l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; - l->c_p.mvmpg_width = &l->mvmpg_width_c; - l->c_p.mvmpg_height = &l->mvmpg_height_c; - l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; - l->c_p.meta_row_width_ub = &l->meta_row_width_c; - l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; - - calculate_mcache_row_bytes(scratch, &l->c_p); - DML_ASSERT(*p->num_mcaches_c > 0); - } - - // Sharing for iMALL access - l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes; - l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes; - l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; - l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; - - if (p->imall_enable) { - *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); - - if (l->is_dual_plane) - *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); - } - - if (!p->surf_vert) // horizonatal access - l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; - else // vertical access - l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; - - // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: - if (*p->num_mcaches_l) { - l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; - } - if (l->is_dual_plane) { - l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; - - /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */ - if (l->mcache_remainder_l && l->mcache_remainder_c) { - if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { - l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + - (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); - } - *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); - } - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); - DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); - DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); - DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); - DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); - DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); - DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); - DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); - DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); - DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); - - if (l->is_dual_plane) { - DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); - DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); - DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); - DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); - DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); - DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); - DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); - DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); - DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); - DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); - } -#endif - // calculate split_coordinate - l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; - l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; - - for (n = 0; n < *p->num_mcaches_l - 1; n++) { - p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; - } - p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; - - if (l->is_dual_plane) { - for (n = 0; n < *p->num_mcaches_c - 1; n++) { - p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; - } - p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; - } -#ifdef __DML_VBA_DEBUG__ - for (n = 0; n < *p->num_mcaches_l; n++) - DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); - - if (l->is_dual_plane) { - for (n = 0; n < *p->num_mcaches_c; n++) - DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); - } -#endif - - // Luma/Chroma combine in the last mcache - // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary - if (*p->lc_comb_mcache && l->is_dual_plane) { - for (n = 0; n < *p->num_mcaches_l - 1; n++) - p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; - p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; - - for (n = 0; n < *p->num_mcaches_c - 1; n++) - p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; - p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; - -#ifdef __DML_VBA_DEBUG__ - for (n = 0; n < *p->num_mcaches_l; n++) - DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); - - for (n = 0; n < *p->num_mcaches_c; n++) - DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); -#endif - } - - *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; - *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; -} - -static void calculate_mall_bw_overhead_factor( - double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref - double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref - - // input - const struct dml2_display_cfg *display_cfg, - unsigned int num_active_planes) -{ - for (unsigned int k = 0; k < num_active_planes; ++k) { - mall_prefetch_sdp_overhead_factor[k] = 1.0; - mall_prefetch_dram_overhead_factor[k] = 1.0; - - // SDP - on the return side - if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return - mall_prefetch_sdp_overhead_factor[k] = 1.25; - else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - mall_prefetch_sdp_overhead_factor[k] = 0.25; - - // DRAM - if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) - mall_prefetch_dram_overhead_factor[k] = 2.0; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); -#endif - } -} - -static double dml_get_return_bandwidth_available( - const struct dml2_soc_bb *soc, - enum dml2_core_internal_soc_state_type state_type, - enum dml2_core_internal_bw_type bw_type, - bool is_avg_bw, - bool is_hvm_en, - bool is_hvm_only, - double dcfclk_mhz, - double fclk_mhz, - double dram_bw_mbps) -{ - double return_bw_mbps = 0.; - double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; - double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; - double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; - - double derate_sdp_factor; - double derate_fabric_factor; - double derate_dram_factor; - - double derate_sdp_bandwidth; - double derate_fabric_bandwidth; - double derate_dram_bandwidth; - - if (is_avg_bw) { - if (state_type == dml2_core_internal_soc_state_svp_prefetch) { - derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; - derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; - derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; - } else { // just assume sys_active - derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; - derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; - derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; - } - } else { // urgent bw - if (state_type == dml2_core_internal_soc_state_svp_prefetch) { - derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; - derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; - derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; - - if (is_hvm_en) { - if (is_hvm_only) - derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; - else - derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; - } else { - derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; - } - } else { // just assume sys_active - derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; - derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; - - if (is_hvm_en) { - if (is_hvm_only) - derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; - else - derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; - } else { - derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; - } - } - } - - derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; - derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; - derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; - - if (bw_type == dml2_core_internal_bw_sdp) - return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); - else // dml2_core_internal_bw_dram - return_bw_mbps = derate_dram_bandwidth; - - DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); - DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); - DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); - DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); - DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); - DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); - DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); - DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); - DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); - DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); - DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); - DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); - return return_bw_mbps; -} - -static noinline_for_stack void calculate_bandwidth_available( - double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], - double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM - double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], - double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], - - const struct dml2_soc_bb *soc, - bool HostVMEnable, - double dcfclk_mhz, - double fclk_mhz, - double dram_bw_mbps) -{ - unsigned int n, m; - - DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); - - // Calculate all the bandwidth availabe - for (m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (n = 0; n < dml2_core_internal_bw_max; n++) { - avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, - m, // soc_state - n, // bw_type - 1, // avg_bw - HostVMEnable, - 0, // hvm_only - dcfclk_mhz, - fclk_mhz, - dram_bw_mbps); - - urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); - - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); -#endif - - // urg_bandwidth_available_vm_only is indexed by soc_state - if (n == dml2_core_internal_bw_dram) { - urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); - urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); - } - } - - avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); - urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); -#endif - } -} - -static void calculate_avg_bandwidth_required( - double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - - // input - const struct dml2_display_cfg *display_cfg, - unsigned int num_active_planes, - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double cursor_bw[], - double dcc_dram_bw_nom_overhead_factor_p0[], - double dcc_dram_bw_nom_overhead_factor_p1[], - double mall_prefetch_dram_overhead_factor[], - double mall_prefetch_sdp_overhead_factor[]) -{ - unsigned int n, m, k; - double sdp_overhead_factor; - double dram_overhead_factor_p0; - double dram_overhead_factor_p1; - - // Average BW support check - for (m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram - avg_bandwidth_required[m][n] = 0; - } - } - - // SysActive and SVP Prefetch AVG bandwidth Check - for (k = 0; k < num_active_planes; ++k) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); - DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); - DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); - DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); -#endif - - sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; - dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; - dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; - - // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? - // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; - avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; - } - avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; - avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); -#endif - } -} - -static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) -{ - struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; - - s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 512; - p->dpte_group_bytes[k] = 512; - } else { - p->vm_group_bytes[k] = 0; - p->dpte_group_bytes[k] = 0; - } - - if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { - if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; - s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; - } else { - s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; - s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; - } - - scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; - scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; - scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; - scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; - scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; - scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; - scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; - scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; - scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; - scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; - scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; - scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; - scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; - scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; - scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; - scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; - scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; - scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; - scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; - scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); - scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; - scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; - - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; - scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; - scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; - scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; - - scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; - scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; - - s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); - - p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( - p->myPipe[k].VRatioChroma, - p->myPipe[k].VTapsChroma, - p->myPipe[k].InterlaceEnable, - p->myPipe[k].ProgressiveToInterlaceUnitInOPP, - p->myPipe[k].SwathHeightC, - p->myPipe[k].RotationAngle, - p->myPipe[k].mirrored, - p->myPipe[k].ViewportStationary, - p->SwathWidthC[k], - p->myPipe[k].ViewportHeightC, - p->myPipe[k].ViewportXStartC, - p->myPipe[k].ViewportYStartC, - - // Output - &p->VInitPreFillC[k], - &p->MaxNumSwathC[k]); - } else { - s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; - s->PTEBufferSizeInRequestsForChroma[k] = 0; - s->PixelPTEBytesPerRowC[k] = 0; - s->PixelPTEBytesPerRowStorageC[k] = 0; - s->vm_bytes_c = 0; - p->MaxNumSwathC[k] = 0; - p->PrefetchSourceLinesC[k] = 0; - s->dpte_row_height_chroma_one_row_per_frame[k] = 0; - s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; - } - - scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; - scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; - scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; - scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; - scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; - scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; - scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; - scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; - scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; - scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; - scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; - scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; - scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; - scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; - scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; - scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; - scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; - scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; - scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; - scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); - scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; - scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; - - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; - scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; - scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; - scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; - scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; - scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; - - scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; - scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; - scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; - scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; - - s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); - - p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( - p->myPipe[k].VRatio, - p->myPipe[k].VTaps, - p->myPipe[k].InterlaceEnable, - p->myPipe[k].ProgressiveToInterlaceUnitInOPP, - p->myPipe[k].SwathHeightY, - p->myPipe[k].RotationAngle, - p->myPipe[k].mirrored, - p->myPipe[k].ViewportStationary, - p->SwathWidthY[k], - p->myPipe[k].ViewportHeight, - p->myPipe[k].ViewportXStart, - p->myPipe[k].ViewportYStart, - - // Output - &p->VInitPreFillY[k], - &p->MaxNumSwathY[k]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); - DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); - DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); -#endif - p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); - p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; - p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k]; - p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); -#endif - if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { - p->PTEBufferSizeNotExceeded[k] = true; - } else { - p->PTEBufferSizeNotExceeded[k] = false; - } - - s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && - s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); -#ifdef __DML_VBA_DEBUG__ - if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - - DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); - } -#endif - } - - CalculateMALLUseForStaticScreen( - p->display_cfg, - p->NumberOfActiveSurfaces, - p->MALLAllocatedForDCN, - p->SurfaceSizeInMALL, - s->one_row_per_frame_fits_in_buffer, - // Output - p->is_using_mall_for_ss); - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->gpuvm_enable) { - if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { - p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; - } - p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || - dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); - p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); - } else { - p->PTE_BUFFER_MODE[k] = 0; - p->BIGK_FRAGMENT_SIZE[k] = 0; - } - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - p->DCCMetaBufferSizeNotExceeded[k] = true; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); -#endif - p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || - (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); - - p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); - - if (p->use_one_row_for_frame[k]) { - p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; - p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; - s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; - p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; - p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; - s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; - p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; - } - - if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { - p->DCCMetaBufferSizeNotExceeded[k] = true; - } else { - p->DCCMetaBufferSizeNotExceeded[k] = false; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); - DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); -#endif - } - - s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); - s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); - p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; - p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k]; - p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k]; - - // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs - if (p->use_one_row_for_frame[k]) - p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; - - CalculateRowBandwidth( - p->display_cfg->gpuvm_enable, - p->use_one_row_for_frame[k], - p->myPipe[k].SourcePixelFormat, - p->myPipe[k].VRatio, - p->myPipe[k].VRatioChroma, - p->myPipe[k].DCCEnable, - p->myPipe[k].HTotal / p->myPipe[k].PixelClock, - s->PixelPTEBytesPerRowY[k], - s->PixelPTEBytesPerRowC[k], - p->dpte_row_height_luma[k], - p->dpte_row_height_chroma[k], - - p->mrq_present, - p->meta_row_bytes_per_row_ub_l[k], - p->meta_row_bytes_per_row_ub_c[k], - p->meta_row_height_luma[k], - p->meta_row_height_chroma[k], - - // Output - &p->dpte_row_bw[k], - &p->meta_row_bw[k]); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); - DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); -#endif - } -} - -static double CalculateUrgentLatency( - double UrgentLatencyPixelDataOnly, - double UrgentLatencyPixelMixedWithVMData, - double UrgentLatencyVMDataOnly, - bool DoUrgentLatencyAdjustment, - double UrgentLatencyAdjustmentFabricClockComponent, - double UrgentLatencyAdjustmentFabricClockReference, - double FabricClock, - double uclk_freq_mhz, - enum dml2_qos_param_type qos_type, - unsigned int urgent_ramp_uclk_cycles, - unsigned int df_qos_response_time_fclk_cycles, - unsigned int max_round_trip_to_furthest_cs_fclk_cycles, - unsigned int mall_overhead_fclk_cycles, - double umc_urgent_ramp_latency_margin, - double fabric_max_transport_latency_margin) -{ - double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4x) { - urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock - + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) - + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); - } else { - urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); - if (DoUrgentLatencyAdjustment == true) { - urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); - } - } -#ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4x) { - DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); - DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); - DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); - } else { - DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); - DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); - DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); - DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); - DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); - } - DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); - DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); -#endif - return urgent_latency; -} - -static double CalculateTripToMemory( - double UrgLatency, - double FabricClock, - double uclk_freq_mhz, - enum dml2_qos_param_type qos_type, - unsigned int trip_to_memory_uclk_cycles, - unsigned int max_round_trip_to_furthest_cs_fclk_cycles, - unsigned int mall_overhead_fclk_cycles, - double umc_max_latency_margin, - double fabric_max_transport_latency_margin) -{ - double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4x) { - trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock - + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) - + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); - } else { - trip_to_memory_us = UrgLatency; - } - -#ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4x) { - DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); - DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); - DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); - DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); - DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); - DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); - DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); - } else { - DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); - } - DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); -#endif - - - return trip_to_memory_us; -} - -static double CalculateMetaTripToMemory( - double UrgLatency, - double FabricClock, - double uclk_freq_mhz, - enum dml2_qos_param_type qos_type, - unsigned int meta_trip_to_memory_uclk_cycles, - unsigned int meta_trip_to_memory_fclk_cycles, - double umc_max_latency_margin, - double fabric_max_transport_latency_margin) -{ - double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4x) { - meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) - + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); - } else { - meta_trip_to_memory_us = UrgLatency; - } - -#ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4x) { - DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); - DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); - DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); - DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - } else { - DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); - } - DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); -#endif - - - return meta_trip_to_memory_us; -} - -static void calculate_cursor_req_attributes( - unsigned int cursor_width, - unsigned int cursor_bpp, - - // output - unsigned int *cursor_lines_per_chunk, - unsigned int *cursor_bytes_per_line, - unsigned int *cursor_bytes_per_chunk, - unsigned int *cursor_bytes) -{ - unsigned int cursor_bytes_per_req = 0; - unsigned int cursor_width_bytes = 0; - unsigned int cursor_height = 0; - - //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. - //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B - //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width - - //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. - - cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); - if (cursor_width_bytes <= 64) - cursor_bytes_per_req = 64; - else if (cursor_width_bytes <= 128) - cursor_bytes_per_req = 128; - else - cursor_bytes_per_req = 256; - - //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. - *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); - - //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. - if (cursor_bpp == 2) { - *cursor_lines_per_chunk = 16; - } else if (cursor_bpp == 32) { - if (cursor_width <= 32) - *cursor_lines_per_chunk = 16; - else if (cursor_width <= 64) - *cursor_lines_per_chunk = 8; - else if (cursor_width <= 128) - *cursor_lines_per_chunk = 4; - else - *cursor_lines_per_chunk = 2; - } else if (cursor_bpp == 64) { - if (cursor_width <= 16) - *cursor_lines_per_chunk = 16; - else if (cursor_width <= 32) - *cursor_lines_per_chunk = 8; - else if (cursor_width <= 64) - *cursor_lines_per_chunk = 4; - else if (cursor_width <= 128) - *cursor_lines_per_chunk = 2; - else - *cursor_lines_per_chunk = 1; - } else { - if (cursor_width > 0) { - DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); - DML_ASSERT(0); - } - } - - *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; - - // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. - // Only cursor_width is provided for worst case sizing so assume that the cursor is square - cursor_height = cursor_width; - *cursor_bytes = *cursor_bytes_per_line * cursor_height; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); - DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); - DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); - DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); - DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); - DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); -#endif -} - -static void calculate_cursor_urgent_burst_factor( - unsigned int CursorBufferSize, - unsigned int CursorWidth, - unsigned int cursor_bytes_per_chunk, - unsigned int cursor_lines_per_chunk, - double LineTime, - double UrgentLatency, - - double *UrgentBurstFactorCursor, - bool *NotEnoughUrgentLatencyHiding) -{ - unsigned int LinesInCursorBuffer = 0; - double CursorBufferSizeInTime = 0; - - if (CursorWidth > 0) { - LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; - - CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; - if (CursorBufferSizeInTime - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorCursor = 1; - } else { - *NotEnoughUrgentLatencyHiding = 0; - *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); - DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); - DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); - DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); - DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); - DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); -#endif - - } -} - -static void CalculateUrgentBurstFactor( - const struct dml2_plane_parameters *plane_cfg, - unsigned int swath_width_luma_ub, - unsigned int swath_width_chroma_ub, - unsigned int SwathHeightY, - unsigned int SwathHeightC, - double LineTime, - double UrgentLatency, - double VRatio, - double VRatioC, - double BytePerPixelInDETY, - double BytePerPixelInDETC, - unsigned int DETBufferSizeY, - unsigned int DETBufferSizeC, - // Output - double *UrgentBurstFactorLuma, - double *UrgentBurstFactorChroma, - bool *NotEnoughUrgentLatencyHiding) -{ - double LinesInDETLuma; - double LinesInDETChroma; - double DETBufferSizeInTimeLuma; - double DETBufferSizeInTimeChroma; - - *NotEnoughUrgentLatencyHiding = 0; - *UrgentBurstFactorLuma = 0; - *UrgentBurstFactorChroma = 0; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); - DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); - DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); - DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); - DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); - DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); -#endif - DML_ASSERT(VRatio > 0); - - LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; - - DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; - if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorLuma = 1; - } else { - *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); - } - - if (BytePerPixelInDETC > 0) { - LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; - - DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; - if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { - *NotEnoughUrgentLatencyHiding = 1; - *UrgentBurstFactorChroma = 1; - } else { - *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); - } - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); - DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); - DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); - DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); - DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); -#endif -} - -static void CalculateDCFCLKDeepSleepTdlut( - const struct dml2_display_cfg *display_cfg, - unsigned int NumberOfActiveSurfaces, - unsigned int BytePerPixelY[], - unsigned int BytePerPixelC[], - unsigned int SwathWidthY[], - unsigned int SwathWidthC[], - unsigned int DPPPerSurface[], - double PSCL_THROUGHPUT[], - double PSCL_THROUGHPUT_CHROMA[], - double Dppclk[], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - unsigned int ReturnBusWidth, - - double dispclk, - unsigned int tdlut_bytes_to_deliver[], - double prefetch_swath_time_us[], - - // Output - double *DCFClkDeepSleep) -{ - double DisplayPipeLineDeliveryTimeLuma; - double DisplayPipeLineDeliveryTimeChroma; - double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; - double ReadBandwidth = 0.0; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - - if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { - DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; - } else { - DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChroma = 0; - } else { - if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { - DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; - } else { - DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } - - if (BytePerPixelC[k] > 0) { - DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, - __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); - } else { - DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; - } - DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); - - // adjust for 3dlut delivery time - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { - double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; - - DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); - - // increase the deepsleep dcfclk to match the original dispclk throughput rate - if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { - DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); - DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); - } - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); - DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); -#endif - } - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; - } - - *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); - DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); - DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); - DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); -#endif - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); - } - - DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); -} - -static noinline_for_stack void CalculateDCFCLKDeepSleep( - const struct dml2_display_cfg *display_cfg, - unsigned int NumberOfActiveSurfaces, - unsigned int BytePerPixelY[], - unsigned int BytePerPixelC[], - unsigned int SwathWidthY[], - unsigned int SwathWidthC[], - unsigned int DPPPerSurface[], - double PSCL_THROUGHPUT[], - double PSCL_THROUGHPUT_CHROMA[], - double Dppclk[], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - unsigned int ReturnBusWidth, - - // Output - double *DCFClkDeepSleep) -{ - double zero_double[DML2_MAX_PLANES]; - unsigned int zero_integer[DML2_MAX_PLANES]; - - memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); - memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); - - CalculateDCFCLKDeepSleepTdlut( - display_cfg, - NumberOfActiveSurfaces, - BytePerPixelY, - BytePerPixelC, - SwathWidthY, - SwathWidthC, - DPPPerSurface, - PSCL_THROUGHPUT, - PSCL_THROUGHPUT_CHROMA, - Dppclk, - ReadBandwidthLuma, - ReadBandwidthChroma, - ReturnBusWidth, - 0, - zero_integer, //tdlut_bytes_to_deliver, - zero_double, //prefetch_swath_time_us, - - // Output - DCFClkDeepSleep); -} - -static double CalculateWriteBackDelay( - enum dml2_source_format_class WritebackPixelFormat, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackVTaps, - unsigned int WritebackDestinationWidth, - unsigned int WritebackDestinationHeight, - unsigned int WritebackSourceHeight, - unsigned int HTotal) -{ - double CalculateWriteBackDelay; - double Line_length; - double Output_lines_last_notclamped; - double WritebackVInit; - - WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; - Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); - Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); - if (Output_lines_last_notclamped < 0) { - CalculateWriteBackDelay = 0; - } else { - CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; - } - return CalculateWriteBackDelay; -} - -static unsigned int CalculateMaxVStartup( - bool ptoi_supported, - unsigned int vblank_nom_default_us, - const struct dml2_timing_cfg *timing, - double write_back_delay_us) -{ - unsigned int vblank_size = 0; - unsigned int max_vstartup_lines = 0; - - double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); - unsigned int vblank_actual = timing->v_total - timing->v_active; - unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); - unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom; - - vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); - - if (timing->interlaced && !ptoi_supported) - max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0)); - else - max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); - DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); - DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); - DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); - DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); - DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); -#endif - max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); - return max_vstartup_lines; -} - -static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) -{ - unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 }; - unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 }; - unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 }; - unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 }; - unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 }; - unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 }; - - unsigned int TotalActiveDPP = 0; - bool NoChromaOrLinear = true; - unsigned int SurfaceDoingUnboundedRequest = 0; - unsigned int DETBufferSizeInKByteForSwathCalculation; - - const long TTUFIFODEPTH = 8; - const long MAXIMUMCOMPRESSION = 4; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); - } -#endif - CalculateSwathWidth( - p->display_cfg, - p->ForceSingleDPP, - p->NumberOfActiveSurfaces, - p->ODMMode, - p->BytePerPixY, - p->BytePerPixC, - p->Read256BytesBlockHeightY, - p->Read256BytesBlockHeightC, - p->Read256BytesBlockWidthY, - p->Read256BytesBlockWidthC, - p->surf_linear128_l, - p->surf_linear128_c, - p->DPPPerSurface, - - // Output - p->req_per_swath_ub_l, - p->req_per_swath_ub_c, - SwathWidthSingleDPP, - SwathWidthSingleDPPChroma, - p->SwathWidth, - p->SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, - p->swath_width_luma_ub, - p->swath_width_chroma_ub); - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); - p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); - DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); -#endif - if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { - p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); - p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); - } - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); - if (p->DPPPerSurface[k] > 0) - SurfaceDoingUnboundedRequest = k; - if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha - || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { - NoChromaOrLinear = false; - } - } - - *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); - - CalculateDETBufferSize( - &scratch->CalculateDETBufferSize_locals, - p->display_cfg, - p->ForceSingleDPP, - p->NumberOfActiveSurfaces, - *p->UnboundedRequestEnabled, - p->nomDETInKByte, - p->MaxTotalDETInKByte, - p->ConfigReturnBufferSizeInKByte, - p->MinCompressedBufferSizeInKByte, - p->ConfigReturnBufferSegmentSizeInkByte, - p->CompressedBufferSegmentSizeInkByte, - p->ReadBandwidthLuma, - p->ReadBandwidthChroma, - p->full_swath_bytes_l, - p->full_swath_bytes_c, - p->DPPPerSurface, - - // Output - p->DETBufferSizeInKByte, // per hubp pipe - p->CompressedBufferSizeInkByte); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); - DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); - DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); - DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); - DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); -#endif - - *p->ViewportSizeSupport = true; - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - - DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); -#endif - if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { - p->SwathHeightY[k] = MaximumSwathHeightY[k]; - p->SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; - RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; - - if (p->surf_linear128_l[k]) - p->request_size_bytes_luma[k] = 128; - else - p->request_size_bytes_luma[k] = 256; - - if (p->surf_linear128_c[k]) - p->request_size_bytes_chroma[k] = 128; - else - p->request_size_bytes_chroma[k] = 256; - - } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k]; - p->SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; - RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; - p->request_size_bytes_luma[k] = 256; - p->request_size_bytes_chroma[k] = 256; - - } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - p->SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; - RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - p->request_size_bytes_chroma[k] = 256; - - } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - p->SwathHeightY[k] = MaximumSwathHeightY[k]; - p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; - RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = 256; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - - } else { - p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; - RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - } - - if (p->SwathHeightC[k] == 0) - p->request_size_bytes_chroma[k] = 0; - - if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || - p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { - *p->ViewportSizeSupport = false; - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); - DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); - p->ViewportSizeSupportPerSurface[k] = false; - } else { - p->ViewportSizeSupportPerSurface[k] = true; - } - - if (p->SwathHeightC[k] == 0) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); -#endif - p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; - p->DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); -#endif - p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; - p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; - } else { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); -#endif - p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); - p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); -#endif - - } - - *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; - if (*p->UnboundedRequestEnabled) { - *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, - (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); - DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); -#endif - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); -#endif - - *p->hw_debug5 = false; -#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE - if (p->NumberOfActiveSurfaces > 1) - *p->hw_debug5 = true; -#else - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) - && p->display_cfg->plane_descriptors[k].surface.dcc.enable - && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) - + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) - *p->hw_debug5 = true; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); -#endif - } -#endif -} - -static enum dml2_odm_mode DecideODMMode(unsigned int HActive, - double MaxDispclk, - unsigned int MaximumPixelsPerLinePerDSCUnit, - enum dml2_output_format_class OutFormat, - bool UseDSC, - unsigned int NumberOfDSCSlices, - double SurfaceRequiredDISPCLKWithoutODMCombine, - double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, - double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, - double SurfaceRequiredDISPCLKWithODMCombineFourToOne) -{ - enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock; - enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive; - enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive; - enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass; - - MinimumRequiredODMModeForMaxDispClock = - (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass : - (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 : - (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; - if (ODMMode < MinimumRequiredODMModeForMaxDispClock) - ODMMode = MinimumRequiredODMModeForMaxDispClock; - - if (UseDSC) { - MinimumRequiredODMModeForMaxDSCHActive = - (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass : - (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 : - (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; - if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive) - ODMMode = MinimumRequiredODMModeForMaxDSCHActive; - } - - if (OutFormat == dml2_420) { - MinimumRequiredODMModeForMax420HActive = - (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass : - (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 : - (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; - if (ODMMode < MinimumRequiredODMModeForMax420HActive) - ODMMode = MinimumRequiredODMModeForMax420HActive; - } - - if (UseDSC) { - if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4) - ODMMode = dml2_odm_mode_combine_2to1; - if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8) - ODMMode = dml2_odm_mode_combine_3to1; - if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12) - ODMMode = dml2_odm_mode_combine_4to1; - } - - return ODMMode; -} - -static void CalculateODMConstraints( - enum dml2_odm_mode ODMUse, - double SurfaceRequiredDISPCLKWithoutODMCombine, - double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, - double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, - double SurfaceRequiredDISPCLKWithODMCombineFourToOne, - unsigned int MaximumPixelsPerLinePerDSCUnit, - /* Output */ - double *DISPCLKRequired, - unsigned int *NumberOfDPPRequired, - unsigned int *MaxHActiveForDSC, - unsigned int *MaxDSCSlices, - unsigned int *MaxHActiveFor420) -{ - switch (ODMUse) { - case dml2_odm_mode_combine_2to1: - *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; - *NumberOfDPPRequired = 2; - break; - case dml2_odm_mode_combine_3to1: - *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; - *NumberOfDPPRequired = 3; - break; - case dml2_odm_mode_combine_4to1: - *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne; - *NumberOfDPPRequired = 4; - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - case dml2_odm_mode_bypass: - default: - *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine; - *NumberOfDPPRequired = 1; - break; - } - *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit; - *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC; - *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH; -} - -static bool ValidateODMMode(enum dml2_odm_mode ODMMode, - double MaxDispclk, - unsigned int HActive, - enum dml2_output_format_class OutFormat, - bool UseDSC, - unsigned int NumberOfDSCSlices, - unsigned int TotalNumberOfActiveDPP, - unsigned int TotalNumberOfActiveOPP, - unsigned int MaxNumDPP, - unsigned int MaxNumOPP, - double DISPCLKRequired, - unsigned int NumberOfDPPRequired, - unsigned int MaxHActiveForDSC, - unsigned int MaxDSCSlices, - unsigned int MaxHActiveFor420) -{ - bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true; - bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1); - unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1; - unsigned int h_timing_div_mode = - (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 : - (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle; - - if (DISPCLKRequired > MaxDispclk) - return false; - if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP) - return false; - if (are_odm_segments_symmetrical) { - if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle)) - return false; - } - if (HActive % h_timing_div_mode) - /* - * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and - * OTG_H_SYNC_A_START/END all need to be visible by h timing div - * mode. This logic only checks H active. - */ - return false; - - if (UseDSC) { - if (HActive > MaxHActiveForDSC) - return false; - if (NumberOfDSCSlices > MaxDSCSlices) - return false; - if (HActive % NumberOfDSCSlices) - return false; - if (NumberOfDSCSlices % NumberOfDPPRequired) - return false; - if (is_max_dsc_slice_required) { - if (NumberOfDSCSlices != MaxDSCSlices) - return false; - } - } - - if (OutFormat == dml2_420) { - if (HActive > MaxHActiveFor420) - return false; - } - - return true; -} - -static noinline_for_stack void CalculateODMMode( - unsigned int MaximumPixelsPerLinePerDSCUnit, - unsigned int HActive, - enum dml2_output_format_class OutFormat, - enum dml2_output_encoder_class Output, - enum dml2_odm_mode ODMUse, - double MaxDispclk, - bool DSCEnable, - unsigned int TotalNumberOfActiveDPP, - unsigned int TotalNumberOfActiveOPP, - unsigned int MaxNumDPP, - unsigned int MaxNumOPP, - double PixelClock, - unsigned int NumberOfDSCSlices, - - // Output - bool *TotalAvailablePipesSupport, - unsigned int *NumberOfDPP, - enum dml2_odm_mode *ODMMode, - double *RequiredDISPCLKPerSurface) -{ - double SurfaceRequiredDISPCLKWithoutODMCombine; - double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; - double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; - double SurfaceRequiredDISPCLKWithODMCombineFourToOne; - double DISPCLKRequired; - unsigned int NumberOfDPPRequired; - unsigned int MaxHActiveForDSC; - unsigned int MaxDSCSlices; - unsigned int MaxHActiveFor420; - bool success; - bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); - enum dml2_odm_mode DecidedODMMode; - bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); - - SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); - SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); - SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); - SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); - DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); - DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); - DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); - DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); - DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); - DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); - DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); - DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); -#endif - if (ODMUse == dml2_odm_mode_auto) - DecidedODMMode = DecideODMMode(HActive, - MaxDispclk, - MaximumPixelsPerLinePerDSCUnit, - OutFormat, - UseDSC, - NumberOfDSCSlices, - SurfaceRequiredDISPCLKWithoutODMCombine, - SurfaceRequiredDISPCLKWithODMCombineTwoToOne, - SurfaceRequiredDISPCLKWithODMCombineThreeToOne, - SurfaceRequiredDISPCLKWithODMCombineFourToOne); - else - DecidedODMMode = ODMUse; - CalculateODMConstraints(DecidedODMMode, - SurfaceRequiredDISPCLKWithoutODMCombine, - SurfaceRequiredDISPCLKWithODMCombineTwoToOne, - SurfaceRequiredDISPCLKWithODMCombineThreeToOne, - SurfaceRequiredDISPCLKWithODMCombineFourToOne, - MaximumPixelsPerLinePerDSCUnit, - &DISPCLKRequired, - &NumberOfDPPRequired, - &MaxHActiveForDSC, - &MaxDSCSlices, - &MaxHActiveFor420); - success = ValidateODMMode(DecidedODMMode, - MaxDispclk, - HActive, - OutFormat, - UseDSC, - NumberOfDSCSlices, - TotalNumberOfActiveDPP, - TotalNumberOfActiveOPP, - MaxNumDPP, - MaxNumOPP, - DISPCLKRequired, - NumberOfDPPRequired, - MaxHActiveForDSC, - MaxDSCSlices, - MaxHActiveFor420); - - *ODMMode = DecidedODMMode; - *TotalAvailablePipesSupport = success; - *NumberOfDPP = NumberOfDPPRequired; - *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); - DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); - DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); - DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); -#endif -} - -static noinline_for_stack void CalculateOutputLink( - struct dml2_core_internal_scratch *s, - double PHYCLK, - double PHYCLKD18, - double PHYCLKD32, - double Downspreading, - enum dml2_output_encoder_class Output, - enum dml2_output_format_class OutputFormat, - unsigned int HTotal, - unsigned int HActive, - double PixelClockBackEnd, - double ForcedOutputLinkBPP, - unsigned int DSCInputBitPerComponent, - unsigned int NumberOfDSCSlices, - double AudioSampleRate, - unsigned int AudioSampleLayout, - enum dml2_odm_mode ODMModeNoDSC, - enum dml2_odm_mode ODMModeDSC, - enum dml2_dsc_enable_option DSCEnable, - unsigned int OutputLinkDPLanes, - enum dml2_output_link_dp_rate OutputLinkDPRate, - - // Output - bool *RequiresDSC, - bool *RequiresFEC, - double *OutBpp, - enum dml2_core_internal_output_type *OutputType, - enum dml2_core_internal_output_type_rate *OutputRate, - unsigned int *RequiredSlots) -{ - bool LinkDSCEnable; - unsigned int dummy; - *RequiresDSC = false; - *RequiresFEC = false; - *OutBpp = 0; - - *OutputType = dml2_core_internal_output_type_unknown; - *OutputRate = dml2_core_internal_output_rate_unknown; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); - DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); - DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); - DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); - DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); - DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); - DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); - DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); - DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); - DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); -#endif - { - if (Output == dml2_hdmi) { - *RequiresDSC = false; - *RequiresFEC = false; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = "HDMI"; - *OutputType = dml2_core_internal_output_type_hdmi; - } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { - if (DSCEnable == dml2_dsc_enable) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml2_dp || Output == dml2_dp2p0) { - *RequiresFEC = true; - } else { - *RequiresFEC = false; - } - } else { - *RequiresDSC = false; - LinkDSCEnable = false; - if (Output == dml2_dp2p0) { - *RequiresFEC = true; - } else { - *RequiresFEC = false; - } - } - if (Output == dml2_dp2p0) { - *OutBpp = 0; - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR10"; - *OutputType = dml2_core_internal_output_type_dp2p0; - *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; - } - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR13p5"; - *OutputType = dml2_core_internal_output_type_dp2p0; - *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; - } - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " UHBR20"; - *OutputType = dml2_core_internal_output_type_dp2p0; - *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; - } - } else { // output is dp or edp - *OutBpp = 0; - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml2_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR"; - *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; - *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; - } - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml2_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR2"; - *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; - *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; - } - if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - - if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - if (Output == dml2_dp) { - *RequiresFEC = true; - } - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, - OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - } - //OutputTypeAndRate = Output & " HBR3"; - *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; - *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; - } - } - } else if (Output == dml2_hdmifrl) { - if (DSCEnable == dml2_dsc_enable) { - *RequiresDSC = true; - LinkDSCEnable = true; - *RequiresFEC = true; - } else { - *RequiresDSC = false; - LinkDSCEnable = false; - *RequiresFEC = false; - } - *OutBpp = 0; - if (PHYCLKD18 >= 3000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = Output & "3x3"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; - } - if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = Output & "6x3"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; - } - if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = Output & "6x4"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; - } - if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - //OutputTypeAndRate = Output & "8x4"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; - } - if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) { - *RequiresDSC = true; - LinkDSCEnable = true; - *RequiresFEC = true; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - } - //OutputTypeAndRate = Output & "10x4"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; - } - if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) { - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { - *RequiresDSC = true; - LinkDSCEnable = true; - *RequiresFEC = true; - *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); - } - //OutputTypeAndRate = Output & "12x4"; - *OutputType = dml2_core_internal_output_type_hdmifrl; - *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; - } - } - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); - DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); - DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); -#endif -} - -static double CalculateWriteBackDISPCLK( - enum dml2_source_format_class WritebackPixelFormat, - double PixelClock, - double WritebackHRatio, - double WritebackVRatio, - unsigned int WritebackHTaps, - unsigned int WritebackVTaps, - unsigned int WritebackSourceWidth, - unsigned int WritebackDestinationWidth, - unsigned int HTotal, - unsigned int WritebackLineBufferSize) -{ - double DISPCLK_H, DISPCLK_V, DISPCLK_HB; - - DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; - DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; - DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; - return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); -} - -static double RequiredDTBCLK( - bool DSCEnable, - double PixelClock, - enum dml2_output_format_class OutputFormat, - double OutputBpp, - unsigned int DSCSlices, - unsigned int HTotal, - unsigned int HActive, - unsigned int AudioRate, - unsigned int AudioLayout) -{ - if (DSCEnable != true) { - return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); - } else { - double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); - double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; - return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; - } -} - -static unsigned int DSCDelayRequirement( - bool DSCEnabled, - enum dml2_odm_mode ODMMode, - unsigned int DSCInputBitPerComponent, - double OutputBpp, - unsigned int HActive, - unsigned int HTotal, - unsigned int NumberOfDSCSlices, - enum dml2_output_format_class OutputFormat, - enum dml2_output_encoder_class Output, - double PixelClock, - double PixelClockBackEnd) -{ - unsigned int DSCDelayRequirement_val = 0; - unsigned int NumberOfDSCSlicesFactor = 1; - - if (DSCEnabled == true && OutputBpp != 0) { - - if (ODMMode == dml2_odm_mode_combine_4to1) - NumberOfDSCSlicesFactor = 4; - else if (ODMMode == dml2_odm_mode_combine_3to1) - NumberOfDSCSlicesFactor = 3; - else if (ODMMode == dml2_odm_mode_combine_2to1) - NumberOfDSCSlicesFactor = 2; - - DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), - (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); - - DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); - DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); - - } else { - DSCDelayRequirement_val = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); - DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); - DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); - DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); - DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); - DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); - DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); - DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); - DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); - DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); -#endif - - return DSCDelayRequirement_val; -} - -static void CalculateSurfaceSizeInMall( - const struct dml2_display_cfg *display_cfg, - unsigned int NumberOfActiveSurfaces, - unsigned int MALLAllocatedForDCN, - unsigned int BytesPerPixelY[], - unsigned int BytesPerPixelC[], - unsigned int Read256BytesBlockWidthY[], - unsigned int Read256BytesBlockWidthC[], - unsigned int Read256BytesBlockHeightY[], - unsigned int Read256BytesBlockHeightC[], - unsigned int ReadBlockWidthY[], - unsigned int ReadBlockWidthC[], - unsigned int ReadBlockHeightY[], - unsigned int ReadBlockHeightC[], - - // Output - unsigned int SurfaceSizeInMALL[], - bool *ExceededMALLSize) -{ - unsigned int TotalSurfaceSizeInMALLForSS = 0; - unsigned int TotalSurfaceSizeInMALLForSubVP = 0; - unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; - const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; - - if (composition->viewport.stationary) { - SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), - math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - - math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * - math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), - math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - - math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); - - if (ReadBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + - math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), - math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - - math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * - math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), - math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - - math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); - } - } else { - SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * - math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); - if (ReadBlockWidthC[k] > 0) { - SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + - math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * - math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); - } - } - } - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - /* SS and Subvp counted separate as they are never used at the same time */ - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) - TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; - else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) - TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; - } - - *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || - (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); - DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); - DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); - DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); -#endif -} - -static void calculate_tdlut_setting( - struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_calculate_tdlut_setting_params *p) -{ - // locals - unsigned int tdlut_bpe = 8; - unsigned int tdlut_width; - unsigned int tdlut_pitch_bytes; - unsigned int tdlut_footprint_bytes; - unsigned int vmpg_bytes; - unsigned int tdlut_vmpg_per_frame; - unsigned int tdlut_pte_req_per_frame; - unsigned int tdlut_bytes_per_line; - double tdlut_drain_rate; - unsigned int tdlut_mpc_width; - unsigned int tdlut_bytes_per_group_simple; - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_to_deliver = 0; - *p->tdlut_bytes_per_group = 0; - *p->tdlut_pte_bytes_per_frame = 0; - *p->tdlut_bytes_per_frame = 0; - return; - } - - if (p->tdlut_mpc_width_flag) { - tdlut_mpc_width = 33; - tdlut_bytes_per_group_simple = 39*256; - } else { - tdlut_mpc_width = 17; - tdlut_bytes_per_group_simple = 10*256; - } - - vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; - - if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { - if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) - tdlut_width = 4916; - else - tdlut_width = 35940; - } else { - if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) - tdlut_width = 17; - else // dml2_tdlut_width_33_cube - tdlut_width = 33; - } - - if (p->is_gfx11) - tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment - else - tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment - - if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) - tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; - else - tdlut_footprint_bytes = tdlut_pitch_bytes; - - if (!p->gpuvm_enable) { - tdlut_vmpg_per_frame = 0; - tdlut_pte_req_per_frame = 0; - } else { - tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; - tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; - } - tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request - *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; - - if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { - //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice - *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; - *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; - //the delivery cycles is DispClk cycles per line * number of lines * number of slices - //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); - } else { - //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements - *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); - *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; - //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); - tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; - } - - //the tdlut is fetched during the 2 row times of prefetch. - if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; - else - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; - *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); - DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); - DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); - DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); - - DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); - DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); - DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); - DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); - DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); - DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); - DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); - DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); - DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); - DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); - DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); - DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); - DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); -#endif -} - -static void CalculateTarb( - const struct dml2_display_cfg *display_cfg, - unsigned int PixelChunkSizeInKByte, - unsigned int NumberOfActiveSurfaces, - unsigned int NumberOfDPP[], - unsigned int dpte_group_bytes[], - unsigned int tdlut_bytes_per_group[], - double HostVMInefficiencyFactor, - double HostVMInefficiencyFactorPrefetch, - unsigned int HostVMMinPageSize, - double ReturnBW, - unsigned int MetaChunkSize, - - // output - double *Tarb, - double *Tarb_prefetch) -{ - double extra_bytes = 0; - double extra_bytes_prefetch = 0; - double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels); - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); - - if (display_cfg->plane_descriptors[k].surface.dcc.enable) - extra_bytes = extra_bytes + (MetaChunkSize * 1024); - - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) - extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; - } - - extra_bytes_prefetch = extra_bytes; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (display_cfg->gpuvm_enable == true) { - extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; - extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; - } - } - *Tarb = extra_bytes / ReturnBW; - *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); - DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); - DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); - DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); -#endif -} - -static double CalculateTWait( - long reserved_vblank_time_ns, - double UrgentLatency, - double Ttrip, - double g6_temp_read_blackout_us) -{ - double TWait; - double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); - DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); - DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); -#endif - return TWait; -} - - -static void CalculateVUpdateAndDynamicMetadataParameters( - unsigned int MaxInterDCNTileRepeaters, - double Dppclk, - double Dispclk, - double DCFClkDeepSleep, - double PixelClock, - unsigned int HTotal, - unsigned int VBlank, - unsigned int DynamicMetadataTransmittedBytes, - unsigned int DynamicMetadataLinesBeforeActiveRequired, - unsigned int InterlaceEnable, - bool ProgressiveToInterlaceUnitInOPP, - - // Output - double *TSetup, - double *Tdmbf, - double *Tdmec, - double *Tdmsks, - unsigned int *VUpdateOffsetPix, - unsigned int *VUpdateWidthPix, - unsigned int *VReadyOffsetPix) -{ - double TotalRepeaterDelayTime; - TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); - *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); - *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); - *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); - *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; - *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; - *Tdmec = HTotal / PixelClock; - - if (DynamicMetadataLinesBeforeActiveRequired == 0) { - *Tdmsks = VBlank * HTotal / PixelClock / 2.0; - } else { - *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; - } - if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { - *Tdmsks = *Tdmsks / 2; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); - DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); - DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); - DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); - DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); - DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); - DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); - DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); - - DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); - DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); - DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); - - DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); -#endif -} - -static double get_urgent_bandwidth_required( - struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, - const struct dml2_display_cfg *display_cfg, - enum dml2_core_internal_soc_state_type state_type, - enum dml2_core_internal_bw_type bw_type, - bool inc_flip_bw, // including flip bw - bool use_qual_row_bw, - unsigned int NumberOfActiveSurfaces, - unsigned int NumberOfDPP[], - double dcc_dram_bw_nom_overhead_factor_p0[], - double dcc_dram_bw_nom_overhead_factor_p1[], - double dcc_dram_bw_pref_overhead_factor_p0[], - double dcc_dram_bw_pref_overhead_factor_p1[], - double mall_prefetch_sdp_overhead_factor[], - double mall_prefetch_dram_overhead_factor[], - double ReadBandwidthLuma[], - double ReadBandwidthChroma[], - double PrefetchBandwidthLuma[], - double PrefetchBandwidthChroma[], - double PrefetchBandwidthMax[], - double excess_vactive_fill_bw_l[], - double excess_vactive_fill_bw_c[], - double cursor_bw[], - double dpte_row_bw[], - double meta_row_bw[], - double prefetch_cursor_bw[], - double prefetch_vmrow_bw[], - double flip_bw[], - double UrgentBurstFactorLuma[], - double UrgentBurstFactorChroma[], - double UrgentBurstFactorCursor[], - double UrgentBurstFactorLumaPre[], - double UrgentBurstFactorChromaPre[], - double UrgentBurstFactorCursorPre[], - /* outputs */ - double surface_required_bw[], - double surface_peak_required_bw[]) -{ - // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS - // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation - - memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; - l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; - l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; - l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; - l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; - - l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; - l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; - l->adj_factor_cur = UrgentBurstFactorCursor[k]; - l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; - l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; - l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; - - bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); - bool exclude_this_plane = false; - - // Exclude phantom pipe in bw calculation for non svp prefetch state - if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) - exclude_this_plane = true; - - // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip. - // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe. - if (use_qual_row_bw) { - if (display_cfg->hostvm_enable) - l->per_plane_flip_bw[k] = 0; // qual_row_bw - else if (!display_cfg->plane_descriptors[k].immediate_flip) - l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); - } else { - // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM) - if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw) - l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); - else - l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; - } - - if (!exclude_this_plane) { - l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; - l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; - l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; - l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; - l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; - surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max); - - /* export peak required bandwidth for the surface */ - surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); - DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); - DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); - DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); - DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); -#endif - } else { - surface_required_bw[k] = 0.0; - } - - l->required_bandwidth_mbps += surface_required_bw[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); - DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); - - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); - DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); - - DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); - - DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); - DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); - DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); -#endif - } - - return l->required_bandwidth_mbps; -} - -static void CalculateExtraLatency( - const struct dml2_display_cfg *display_cfg, - unsigned int ROBBufferSizeInKByte, - unsigned int RoundTripPingLatencyCycles, - unsigned int ReorderingBytes, - double DCFCLK, - double FabricClock, - unsigned int PixelChunkSizeInKByte, - double ReturnBW, - unsigned int NumberOfActiveSurfaces, - unsigned int NumberOfDPP[], - unsigned int dpte_group_bytes[], - unsigned int tdlut_bytes_per_group[], - double HostVMInefficiencyFactor, - double HostVMInefficiencyFactorPrefetch, - unsigned int HostVMMinPageSize, - enum dml2_qos_param_type qos_type, - bool max_outstanding_when_urgent_expected, - unsigned int max_outstanding_requests, - unsigned int request_size_bytes_luma[], - unsigned int request_size_bytes_chroma[], - unsigned int MetaChunkSize, - unsigned int dchub_arb_to_ret_delay, - double Ttrip, - unsigned int hostvm_mode, - - // output - double *ExtraLatency, // Tex - double *ExtraLatency_sr, // Tex_sr - double *ExtraLatencyPrefetch) - -{ - double Tarb; - double Tarb_prefetch; - double Tex_trips; - unsigned int max_request_size_bytes = 0; - - CalculateTarb( - display_cfg, - PixelChunkSizeInKByte, - NumberOfActiveSurfaces, - NumberOfDPP, - dpte_group_bytes, - tdlut_bytes_per_group, - HostVMInefficiencyFactor, - HostVMInefficiencyFactorPrefetch, - HostVMMinPageSize, - ReturnBW, - MetaChunkSize, - // output - &Tarb, - &Tarb_prefetch); - - Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - if (request_size_bytes_luma[k] > max_request_size_bytes) - max_request_size_bytes = request_size_bytes_luma[k]; - if (request_size_bytes_chroma[k] > max_request_size_bytes) - max_request_size_bytes = request_size_bytes_chroma[k]; - } - - if (qos_type == dml2_qos_param_type_dcn4x) { - *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; - *ExtraLatency = *ExtraLatency_sr; - if (max_outstanding_when_urgent_expected) - *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; - } else { - *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; - *ExtraLatency = *ExtraLatency_sr; - } - *ExtraLatency = *ExtraLatency + Tex_trips; - *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; - *ExtraLatency = *ExtraLatency + Tarb; - *ExtraLatency_sr = *ExtraLatency_sr + Tarb; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); - DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); - DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); - DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); - DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); - DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); - DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); - DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); - DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); - DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); - DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); - DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); - DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); -#endif -} - -static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) -{ - struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; - bool dcc_mrq_enable; - - unsigned int vm_bytes; - unsigned int extra_tdpe_bytes; - unsigned int tdlut_row_bytes; - unsigned int Lo; - - s->NoTimeToPrefetch = false; - s->DPPCycles = 0; - s->DISPCLKCycles = 0; - s->DSTTotalPixelsAfterScaler = 0.0; - s->LineTime = 0.0; - s->dst_y_prefetch_equ = 0.0; - s->prefetch_bw_oto = 0.0; - s->Tvm_oto = 0.0; - s->Tr0_oto = 0.0; - s->Tvm_oto_lines = 0.0; - s->Tr0_oto_lines = 0.0; - s->dst_y_prefetch_oto = 0.0; - s->TimeForFetchingVM = 0.0; - s->TimeForFetchingRowInVBlank = 0.0; - s->LinesToRequestPrefetchPixelData = 0.0; - s->HostVMDynamicLevelsTrips = 0; - s->trip_to_mem = 0.0; - *p->Tvm_trips = 0.0; - *p->Tr0_trips = 0.0; - s->Tvm_trips_rounded = 0.0; - s->Tr0_trips_rounded = 0.0; - s->max_Tsw = 0.0; - s->Lsw_oto = 0.0; - *p->Tpre_rounded = 0.0; - s->prefetch_bw_equ = 0.0; - s->Tvm_equ = 0.0; - s->Tr0_equ = 0.0; - s->Tdmbf = 0.0; - s->Tdmec = 0.0; - s->Tdmsks = 0.0; - *p->prefetch_sw_bytes = 0.0; - s->prefetch_bw_pr = 0.0; - s->bytes_pp = 0.0; - s->dep_bytes = 0.0; - s->min_Lsw_oto = 0.0; - s->min_Lsw_equ = 0.0; - s->Tsw_est1 = 0.0; - s->Tsw_est2 = 0.0; - s->Tsw_est3 = 0.0; - s->cursor_prefetch_bytes = 0; - *p->prefetch_cursor_bw = 0; - *p->RequiredPrefetchBWMax = 0.0; - - dcc_mrq_enable = (p->dcc_enable && p->mrq_present); - - s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip) - - if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { - s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels; - } else { - s->HostVMDynamicLevelsTrips = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); - DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); - DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); - DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); - DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); - DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); - DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); - DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); - DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); -#endif - CalculateVUpdateAndDynamicMetadataParameters( - p->MaxInterDCNTileRepeaters, - p->myPipe->Dppclk, - p->myPipe->Dispclk, - p->myPipe->DCFClkDeepSleep, - p->myPipe->PixelClock, - p->myPipe->HTotal, - p->myPipe->VBlank, - p->DynamicMetadataTransmittedBytes, - p->DynamicMetadataLinesBeforeActiveRequired, - p->myPipe->InterlaceEnable, - p->myPipe->ProgressiveToInterlaceUnitInOPP, - p->TSetup, - - // Output - &s->Tdmbf, - &s->Tdmec, - &s->Tdmsks, - p->VUpdateOffsetPix, - p->VUpdateWidthPix, - p->VReadyOffsetPix); - - s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; - s->trip_to_mem = p->Ttrip; - *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); - if (dcc_mrq_enable) - *p->Tvm_trips_flip = *p->Tvm_trips; - else - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; - - *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); - *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); - - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex - } - - if (p->DynamicMetadataEnable == true) { - if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { - *p->NotEnoughTimeForDynamicMetadata = true; - DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); - DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - } else { - *p->NotEnoughTimeForDynamicMetadata = false; - } - } else { - *p->NotEnoughTimeForDynamicMetadata = false; - } - - if (p->myPipe->ScalerEnabled) - s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); - else - s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); - - s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); - - s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; - - if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) - return true; - - *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); - *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + - ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + - ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); - DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); - DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); - DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); - DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); - DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); - DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); - DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); - DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); - DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); - - DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); - DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); - DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); - DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); -#endif - - if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) - *p->DSTYAfterScaler = 1; - else - *p->DSTYAfterScaler = 0; - - s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; - *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); - *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); - DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); -#endif - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); -#endif - if (p->display_cfg->gpuvm_enable) { - s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; - *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; - } else { - if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) - s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); - else - s->Tvm_trips_rounded = s->LineTime / 4.0; - *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; - } - - s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); - *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); - - if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { - s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; - *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; - } else { - s->Tr0_trips_rounded = s->LineTime / 4.0; - *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; - } - s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); - *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); - - if (p->display_cfg->gpuvm_enable == true) { - if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { - *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); - } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { - *p->Tno_bw = p->ExtraLatencyPrefetch; - } else { - *p->Tno_bw = 0; - } - } else { - *p->Tno_bw = 0; - } - - if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) - *p->Tno_bw_flip = *p->Tno_bw; - else - *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip - - if (dml_is_420(p->myPipe->SourcePixelFormat)) { - s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; - } else { - s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; - } - - *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; - - vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; - extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); - - if (p->setup_for_tdlut) - vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); - - tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); - - s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); - - // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto - // Note: in prefetch calculation, acounting is done mostly per-pipe. - // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time - s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; - - // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) - s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; - - if (p->myPipe->BytePerPixelC > 0) { - s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; - s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; - } - - /* oto prefetch bw should be always be less than total vactive bw */ - //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); - - s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; - - s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); - - s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; - - s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, - p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); - - /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. - * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule - * and the required bandwidth increases when going from ms to mp - */ - *p->RequiredPrefetchBWMax = s->prefetch_bw_oto; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); - DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); - DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); -#endif - - if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_oto = math_max3( - *p->Tvm_trips, - *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, - s->LineTime / 4.0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); - DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); - DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); -#endif - } else { - s->Tvm_oto = s->Tvm_trips_rounded; - } - - if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { - s->Tr0_oto = math_max3( - *p->Tr0_trips, - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, - s->LineTime / 4.0); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); - DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); - DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); -#endif - } else - s->Tr0_oto = s->LineTime / 4.0; - - s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; - s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; - s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; - -#ifdef DML_GLOBAL_PREFETCH_CHECK - DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); - if (p->impacted_dst_y_pre > 0) { - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); - s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); - } -#endif - *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; - - //To (time for delay after scaler) in line time - Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); - - s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); - //Tpre_equ in line time - if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; - else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; - -#ifdef DML_GLOBAL_PREFETCH_CHECK - s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); - - s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH - - if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) - s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; -#endif - - s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); - DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); - DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); - DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); - DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); - DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); - DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); - DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); - DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); - DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); - DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); - DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); - DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); - DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); - DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); - DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); - DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); - DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); - DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); -#endif - s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; - *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); - DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); - DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); - DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); - DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); - DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); - DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); - DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); - DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); - DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); - DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); - DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); - DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); - DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); - DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); -#endif - - *p->dst_y_per_vm_vblank = 0; - *p->dst_y_per_row_vblank = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixelDataBWLuma = 0; - - // Derive bandwidth by finding how much data to move within the time constraint - // Tpre_rounded is Tpre rounding to 2-bit fraction - // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time - // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time - // So that means prefetch bw calculated can be higher since the total time available for prefetch is less - bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; - bool tpre_gt_req_latency = true; -#if 0 - // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. - // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. - // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. - tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); -#endif - - if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { - s->prefetch_bw1 = 0.; - s->prefetch_bw2 = 0.; - s->prefetch_bw3 = 0.; - s->prefetch_bw4 = 0.; - - // prefetch_bw1: VM + 2*R0 + SW - if (*p->Tpre_rounded - *p->Tno_bw > 0) { - s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor - + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) - + *p->prefetch_sw_bytes) - / (*p->Tpre_rounded - *p->Tno_bw); - s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; - } else - s->prefetch_bw1 = 0; - - DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { - s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); - DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); - DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); - DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); - DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); -#endif - } - - // prefetch_bw2: VM + SW - if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { - s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / - (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); - s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; - } else - s->prefetch_bw2 = 0; - - DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); - if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { - s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); - DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); - } - - // prefetch_bw3: 2*R0 + SW - if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / - (*p->Tpre_rounded - s->Tvm_trips_rounded); - s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; - } else - s->prefetch_bw3 = 0; - - DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); - DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); - } - - // prefetch_bw4: SW - if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) - s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); - else - s->prefetch_bw4 = 0; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); - DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); - DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); - DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); - DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); - DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); - DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); - DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); - DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); - DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); -#endif - { - bool Case1OK = false; - bool Case2OK = false; - bool Case3OK = false; - - // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement - // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive - // vs the latency based number - - // prefetch_bw1: VM + 2*R0 + SW - // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data) - // here is to make sure equ bw wont be more agressive than the latency-based requirement. - // check vm time >= vm_trips - // check r0 time >= r0_trips - - double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); - DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); - - if (s->prefetch_bw1 > 0) { - double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; - double row_transfer_time = total_row_bytes / s->prefetch_bw1; - DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); - DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); - if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { - Case1OK = true; - } - } - - // prefetch_bw2: VM + SW - // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw) - // check vm time >= vm_trips - // check r0 time < r0_trips - if (s->prefetch_bw2 > 0) { - double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; - double row_transfer_time = total_row_bytes / s->prefetch_bw2; - DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); - DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); - if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { - Case2OK = true; - } - } - - // prefetch_bw3: VM + 2*R0 - // check vm time < vm_trips - // check r0 time >= r0_trips - if (s->prefetch_bw3 > 0) { - double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; - double row_transfer_time = total_row_bytes / s->prefetch_bw3; - DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); - DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); - if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { - Case3OK = true; - } - } - - if (Case1OK) { - s->prefetch_bw_equ = s->prefetch_bw1; - } else if (Case2OK) { - s->prefetch_bw_equ = s->prefetch_bw2; - } else if (Case3OK) { - s->prefetch_bw_equ = s->prefetch_bw3; - } else { - s->prefetch_bw_equ = s->prefetch_bw4; - } - - s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, - p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); - DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); - DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); - DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); -#endif - - if (s->prefetch_bw_equ > 0) { - if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); - } else { - s->Tvm_equ = s->LineTime / 4; - } - - if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { - s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes - *p->Tr0_trips, - s->LineTime / 4); - } else { - s->Tr0_equ = s->LineTime / 4; - } - } else { - s->Tvm_equ = 0; - s->Tr0_equ = 0; - DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); - } - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); - DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); -#endif - // Use the more stressful prefetch schedule - if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { - *p->dst_y_prefetch = s->dst_y_prefetch_oto; - s->TimeForFetchingVM = s->Tvm_oto; - s->TimeForFetchingRowInVBlank = s->Tr0_oto; - - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); -#endif - - } else { - *p->dst_y_prefetch = s->dst_y_prefetch_equ; - - if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) - *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; - - s->TimeForFetchingVM = s->Tvm_equ; - s->TimeForFetchingRowInVBlank = s->Tr0_equ; - - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - - /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming. - * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data - * bandwidth may end up higher than what was calculated in mode support. - */ - *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); -#endif - } - - // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) - s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw - - s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); - *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); - *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); - DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); - DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); - DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); - DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); - - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); - DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); - DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); - DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); -#endif - DML_ASSERT(*p->dst_y_prefetch < 64); - - unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); - if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { - *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; - *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); - DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); -#endif - if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { - if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { - *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, - (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); - } else { - s->NoTimeToPrefetch = true; - DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); - *p->VRatioPrefetchY = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); -#endif - } - - *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; - *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); - DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); -#endif - if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { - if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { - *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); - } else { - s->NoTimeToPrefetch = true; - DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); - *p->VRatioPrefetchC = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); -#endif - } - - *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; - *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); - DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); -#endif - } else { - s->NoTimeToPrefetch = true; - DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixelDataBWLuma = 0; - *p->RequiredPrefetchPixelDataBWChroma = 0; - } - DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); - DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); - DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); - DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); - DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); - DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); - DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); - DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); - - } else { - DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", - __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); - s->NoTimeToPrefetch = true; - s->TimeForFetchingVM = 0; - s->TimeForFetchingRowInVBlank = 0; - *p->dst_y_per_vm_vblank = 0; - *p->dst_y_per_row_vblank = 0; - s->LinesToRequestPrefetchPixelData = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixelDataBWLuma = 0; - *p->RequiredPrefetchPixelDataBWChroma = 0; - } - - { - double prefetch_vm_bw; - double prefetch_row_bw; - - if (vm_bytes == 0) { - prefetch_vm_bw = 0; - } else if (*p->dst_y_per_vm_vblank > 0) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); -#endif - prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); -#endif - } else { - prefetch_vm_bw = 0; - s->NoTimeToPrefetch = true; - DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); - } - - if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { - prefetch_row_bw = 0; - } else if (*p->dst_y_per_row_vblank > 0) { - prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); -#endif - } else { - prefetch_row_bw = 0; - s->NoTimeToPrefetch = true; - DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); - } - - *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); - } - - if (s->NoTimeToPrefetch) { - s->TimeForFetchingVM = 0; - s->TimeForFetchingRowInVBlank = 0; - *p->dst_y_per_vm_vblank = 0; - *p->dst_y_per_row_vblank = 0; - *p->dst_y_prefetch = 0; - s->LinesToRequestPrefetchPixelData = 0; - *p->VRatioPrefetchY = 0; - *p->VRatioPrefetchC = 0; - *p->RequiredPrefetchPixelDataBWLuma = 0; - *p->RequiredPrefetchPixelDataBWChroma = 0; - *p->prefetch_vmrow_bw = 0; - } - - DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); - DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); - DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); - DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); - - return s->NoTimeToPrefetch; -} - -static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, - unsigned int line_buffer_size_bits, - unsigned int num_pipes, - unsigned int vp_width, - unsigned int vp_height, - double h_ratio, - enum dml2_rotation_angle rotation_angle) -{ - unsigned int num_lb_source_lines = 0; - double lb_bit_per_pixel = 57.0; - unsigned recin_width = vp_width/num_pipes; - - if (dml_is_vertical_rotation(rotation_angle)) - recin_width = vp_height/num_pipes; - - num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, - math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); - - return num_lb_source_lines; -} - -static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) -{ - int max_value = -1; - int max_idx = -1; - for (unsigned int i = 0; i < num_planes; i++) { - if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { - max_value = Trpd_dcfclk_cycles[i]; - max_idx = i; - } - } - if (max_idx <= 0) { - DML_ASSERT(max_idx >= 0); - max_idx = this_plane_idx; - } - - return max_idx; -} - -static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) -{ - double sum = 0.; - for (unsigned int i = 0; i < num_planes; i++) { - if (i != exclude_plane_idx) { - sum += prefetch_swath_bytes[i]; - } - } - return sum / bw_mbps; -} - -// a global check against the aggregate effect of the per plane prefetch schedule -static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) -{ - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; - unsigned int i, k; - - memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); - - *p->recalc_prefetch_schedule = 0; - s->prefetch_global_check_passed = 1; - // worst case if the rob and cdb is fully hogged - s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); - DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); - DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); - DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); - DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); - DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); -#endif - - // calculate the return impact from each plane, request is 256B per dcfclk - for (i = 0; i < p->num_active_planes; i++) { - s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; - s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; - s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; - s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; - - if (p->pixel_format[i] == dml2_420_10) { - s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); - s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); - s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); - s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); - } - - s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); - s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); - DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); - DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); - DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); - DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); - DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); -#endif - - if (s->src_swath_bytes_c[i] > 0) { // dual_plane - s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); - - if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { - s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); - DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); - DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); - DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); -#endif - } - - s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate - s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); - DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); - DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); -#endif - // clamping to worst case delay which is one which occupy the full rob+cdb - if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) - s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; - } - - // Figure out the impacted prefetch time for each plane - // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw - for (i = 0; i < p->num_active_planes; i++) { - k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i - // the rest of planes (except for k) complete for bw - p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; - p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); - p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); -#endif - } - - if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { - for (i = 0; i < p->num_active_planes; i++) { - if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { - s->prefetch_global_check_passed = 0; - *p->recalc_prefetch_schedule = 1; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); - DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); -#endif - } - } else { - // likely a mode programming calls, assume support, and no recalc - not used anyways - s->prefetch_global_check_passed = 1; - *p->recalc_prefetch_schedule = 0; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); - DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); -#endif - - return s->prefetch_global_check_passed; -} - -static void calculate_peak_bandwidth_required( - struct dml2_core_internal_scratch *s, - struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) -{ - unsigned int n; - unsigned int m; - - struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; - - memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); - DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); -#endif - - for (unsigned int k = 0; k < p->num_active_planes; ++k) { - l->unity_array[k] = 1.0; - l->zero_array[k] = 0.0; - } - - for (m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (n = 0; n < dml2_core_internal_bw_max; n++) { - get_urgent_bandwidth_required( - &s->get_urgent_bandwidth_required_locals, - p->display_cfg, - m, - n, - 0, //inc_flip_bw, - 0, //use_qual_row_bw - p->num_active_planes, - p->num_of_dpp, - p->dcc_dram_bw_nom_overhead_factor_p0, - p->dcc_dram_bw_nom_overhead_factor_p1, - p->dcc_dram_bw_pref_overhead_factor_p0, - p->dcc_dram_bw_pref_overhead_factor_p1, - p->mall_prefetch_sdp_overhead_factor, - p->mall_prefetch_dram_overhead_factor, - p->surface_read_bandwidth_l, - p->surface_read_bandwidth_c, - l->zero_array, //PrefetchBandwidthLuma, - l->zero_array, //PrefetchBandwidthChroma, - l->zero_array, //PrefetchBWMax - l->zero_array, - l->zero_array, - l->zero_array, - p->dpte_row_bw, - p->meta_row_bw, - l->zero_array, //prefetch_cursor_bw, - l->zero_array, //prefetch_vmrow_bw, - l->zero_array, //flip_bw, - l->zero_array, - l->zero_array, - l->zero_array, - l->zero_array, - l->zero_array, - l->zero_array, - p->surface_avg_vactive_required_bw[m][n], - p->surface_peak_required_bw[m][n]); - - p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( - &s->get_urgent_bandwidth_required_locals, - p->display_cfg, - m, - n, - 0, //inc_flip_bw, - 0, //use_qual_row_bw - p->num_active_planes, - p->num_of_dpp, - p->dcc_dram_bw_nom_overhead_factor_p0, - p->dcc_dram_bw_nom_overhead_factor_p1, - p->dcc_dram_bw_pref_overhead_factor_p0, - p->dcc_dram_bw_pref_overhead_factor_p1, - p->mall_prefetch_sdp_overhead_factor, - p->mall_prefetch_dram_overhead_factor, - p->surface_read_bandwidth_l, - p->surface_read_bandwidth_c, - l->zero_array, //PrefetchBandwidthLuma, - l->zero_array, //PrefetchBandwidthChroma, - l->zero_array, //PrefetchBWMax - p->excess_vactive_fill_bw_l, - p->excess_vactive_fill_bw_c, - p->cursor_bw, - p->dpte_row_bw, - p->meta_row_bw, - l->zero_array, //prefetch_cursor_bw, - l->zero_array, //prefetch_vmrow_bw, - l->zero_array, //flip_bw, - p->urgent_burst_factor_l, - p->urgent_burst_factor_c, - p->urgent_burst_factor_cursor, - p->urgent_burst_factor_prefetch_l, - p->urgent_burst_factor_prefetch_c, - p->urgent_burst_factor_prefetch_cursor, - l->surface_dummy_bw, - p->surface_peak_required_bw[m][n]); - - p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( - &s->get_urgent_bandwidth_required_locals, - p->display_cfg, - m, - n, - p->inc_flip_bw, - 0, //use_qual_row_bw - p->num_active_planes, - p->num_of_dpp, - p->dcc_dram_bw_nom_overhead_factor_p0, - p->dcc_dram_bw_nom_overhead_factor_p1, - p->dcc_dram_bw_pref_overhead_factor_p0, - p->dcc_dram_bw_pref_overhead_factor_p1, - p->mall_prefetch_sdp_overhead_factor, - p->mall_prefetch_dram_overhead_factor, - p->surface_read_bandwidth_l, - p->surface_read_bandwidth_c, - p->prefetch_bandwidth_l, - p->prefetch_bandwidth_c, - p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw - p->excess_vactive_fill_bw_l, - p->excess_vactive_fill_bw_c, - p->cursor_bw, - p->dpte_row_bw, - p->meta_row_bw, - p->prefetch_cursor_bw, - p->prefetch_vmrow_bw, - p->flip_bw, - p->urgent_burst_factor_l, - p->urgent_burst_factor_c, - p->urgent_burst_factor_cursor, - p->urgent_burst_factor_prefetch_l, - p->urgent_burst_factor_prefetch_c, - p->urgent_burst_factor_prefetch_cursor, - l->surface_dummy_bw, - p->surface_peak_required_bw[m][n]); - - p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required( - &s->get_urgent_bandwidth_required_locals, - p->display_cfg, - m, - n, - 0, //inc_flip_bw - 1, //use_qual_row_bw - p->num_active_planes, - p->num_of_dpp, - p->dcc_dram_bw_nom_overhead_factor_p0, - p->dcc_dram_bw_nom_overhead_factor_p1, - p->dcc_dram_bw_pref_overhead_factor_p0, - p->dcc_dram_bw_pref_overhead_factor_p1, - p->mall_prefetch_sdp_overhead_factor, - p->mall_prefetch_dram_overhead_factor, - p->surface_read_bandwidth_l, - p->surface_read_bandwidth_c, - p->prefetch_bandwidth_l, - p->prefetch_bandwidth_c, - p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw - p->excess_vactive_fill_bw_l, - p->excess_vactive_fill_bw_c, - p->cursor_bw, - p->dpte_row_bw, - p->meta_row_bw, - p->prefetch_cursor_bw, - p->prefetch_vmrow_bw, - p->flip_bw, - p->urgent_burst_factor_l, - p->urgent_burst_factor_c, - p->urgent_burst_factor_cursor, - p->urgent_burst_factor_prefetch_l, - p->urgent_burst_factor_prefetch_c, - p->urgent_burst_factor_prefetch_cursor, - l->surface_dummy_bw, - p->surface_peak_required_bw[m][n]); - - p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( - &s->get_urgent_bandwidth_required_locals, - p->display_cfg, - m, - n, - p->inc_flip_bw, - 0, //use_qual_row_bw - p->num_active_planes, - p->num_of_dpp, - p->dcc_dram_bw_nom_overhead_factor_p0, - p->dcc_dram_bw_nom_overhead_factor_p1, - p->dcc_dram_bw_pref_overhead_factor_p0, - p->dcc_dram_bw_pref_overhead_factor_p1, - p->mall_prefetch_sdp_overhead_factor, - p->mall_prefetch_dram_overhead_factor, - p->surface_read_bandwidth_l, - p->surface_read_bandwidth_c, - p->prefetch_bandwidth_l, - p->prefetch_bandwidth_c, - p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw - p->excess_vactive_fill_bw_l, - p->excess_vactive_fill_bw_c, - p->cursor_bw, - p->dpte_row_bw, - p->meta_row_bw, - p->prefetch_cursor_bw, - p->prefetch_vmrow_bw, - p->flip_bw, - l->unity_array, - l->unity_array, - l->unity_array, - l->unity_array, - l->unity_array, - l->unity_array, - l->surface_dummy_bw, - p->surface_peak_required_bw[m][n]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); -#endif - DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); - } - } -} - -static void check_urgent_bandwidth_support( - double *frac_urg_bandwidth_nom, - double *frac_urg_bandwidth_mall, - bool *vactive_bandwidth_support_ok, // vactive ok - bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok - - unsigned int mall_allocated_for_dcn_mbytes, - double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) -{ - double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; - double frac_urg_bandwidth_mall_sdp; - double frac_urg_bandwidth_mall_dram; - if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0) - frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; - else - frac_urg_bandwidth_mall_sdp = 0.0; - if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0) - frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; - else - frac_urg_bandwidth_mall_dram = 0.0; - - *bandwidth_support_ok = 1; - *vactive_bandwidth_support_ok = 1; - - // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth - // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram - // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL - // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch - - *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; - - if (mall_allocated_for_dcn_mbytes > 0) { - *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; - *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; - } - - *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); - *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); - - *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); - - if (mall_allocated_for_dcn_mbytes > 0) - *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); - - *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; - if (mall_allocated_for_dcn_mbytes > 0) { - *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; - *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); - - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); - DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); - - for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", - __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), - urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); - } - } -#endif -} - -static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, - double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip - double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) -{ - double flip_bw_available_mbps; - double flip_bw_available_sdp_mbps; - double flip_bw_available_dram_mbps; - - flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; - flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; - flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); - DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); - DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); - DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); -#endif - - return flip_bw_available_mbps; -} - -static void calculate_immediate_flip_bandwidth_support( - // Output - double *frac_urg_bandwidth_flip, - bool *flip_bandwidth_support_ok, - - // Input - enum dml2_core_internal_soc_state_type eval_state, - double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], - double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) -{ - double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; - double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; - - *flip_bandwidth_support_ok = true; - for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram - *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); - DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); - DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); -#endif - DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); - } - - *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; - *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); - DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); - DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); - DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); - - for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", - __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), - urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); - } - } -#endif -} - -static void CalculateFlipSchedule( - struct dml2_core_internal_scratch *s, - bool iflip_enable, - bool use_lb_flip_bw, - double HostVMInefficiencyFactor, - double Tvm_trips_flip, - double Tr0_trips_flip, - double Tvm_trips_flip_rounded, - double Tr0_trips_flip_rounded, - bool GPUVMEnable, - double vm_bytes, // vm_bytes - double DPTEBytesPerRow, // dpte_row_bytes - double BandwidthAvailableForImmediateFlip, - unsigned int TotImmediateFlipBytes, - enum dml2_source_format_class SourcePixelFormat, - double LineTime, - double VRatio, - double VRatioChroma, - double Tno_bw_flip, - unsigned int dpte_row_height, - unsigned int dpte_row_height_chroma, - bool use_one_row_for_frame_flip, - unsigned int max_flip_time_us, - unsigned int max_flip_time_lines, - unsigned int per_pipe_flip_bytes, - unsigned int meta_row_bytes, - unsigned int meta_row_height, - unsigned int meta_row_height_chroma, - bool dcc_mrq_enable, - - // Output - double *dst_y_per_vm_flip, - double *dst_y_per_row_flip, - double *final_flip_bw, - bool *ImmediateFlipSupportedForPipe) -{ - struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - - l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; - l->dpte_row_bytes = DPTEBytesPerRow; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); - DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); - DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); - DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); - DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); - DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); - DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); - DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); - DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); - DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); - DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); - DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); - DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); - DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); - DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); - DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); - DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); -#endif - - if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { - if (l->dual_plane) { - if (dcc_mrq_enable & GPUVMEnable) { - l->min_row_height = math_min2(dpte_row_height, meta_row_height); - l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); - } else if (GPUVMEnable) { - l->min_row_height = dpte_row_height; - l->min_row_height_chroma = dpte_row_height_chroma; - } else { - l->min_row_height = meta_row_height; - l->min_row_height_chroma = meta_row_height_chroma; - } - l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); - } else { - if (dcc_mrq_enable & GPUVMEnable) - l->min_row_height = math_min2(dpte_row_height, meta_row_height); - else if (GPUVMEnable) - l->min_row_height = dpte_row_height; - else - l->min_row_height = meta_row_height; - - l->min_row_time = l->min_row_height * LineTime / VRatio; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); -#endif - DML_ASSERT(l->min_row_time > 0); - - if (use_lb_flip_bw) { - // For mode check, calculation the flip bw requirement with worst case flip time - l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), - math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); - - //The lower bound on flip bandwidth - // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required - l->lb_flip_bw = 0; - - if (iflip_enable) { - l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; - l->num_rows = 2; - l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); - l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; - l->lb_flip_bw = math_max3( - l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), - l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), - l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); - DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); - DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); - DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); - DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); - DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); - - if (l->lb_flip_bw > 0) { - DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); - DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); - DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); - DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); - DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); - } -#endif - l->lb_flip_bw = math_max3(l->lb_flip_bw, - l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, - (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); - DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); -#endif - } - - *final_flip_bw = l->lb_flip_bw; - - *dst_y_per_vm_flip = 1; // not used - *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); - } else { - if (iflip_enable) { - l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); - DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); - DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); -#endif - if (l->ImmediateFlipBW == 0) { - l->Tvm_flip = 0; - l->Tr0_flip = 0; - } else { - l->Tvm_flip = math_max3(Tvm_trips_flip, - Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, - LineTime / 4.0); - - l->Tr0_flip = math_max3(Tr0_trips_flip, - (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, - LineTime / 4.0); - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); - - DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); - DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); -#endif - *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; - *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; - - *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), - (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); - - if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { - *ImmediateFlipSupportedForPipe = false; - } else { - *ImmediateFlipSupportedForPipe = iflip_enable; - } - } else { - l->Tvm_flip = 0; - l->Tr0_flip = 0; - *dst_y_per_vm_flip = 0; - *dst_y_per_row_flip = 0; - *final_flip_bw = 0; - *ImmediateFlipSupportedForPipe = iflip_enable; - } - } - } else { - l->Tvm_flip = 0; - l->Tr0_flip = 0; - *dst_y_per_vm_flip = 0; - *dst_y_per_row_flip = 0; - *final_flip_bw = 0; - *ImmediateFlipSupportedForPipe = iflip_enable; - } - -#ifdef __DML_VBA_DEBUG__ - if (!use_lb_flip_bw) { - DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); - DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); - DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); - DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); - DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); - } - DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); - DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); -#endif -} - -static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) -{ - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; - - enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; - double reserved_vblank_time_us; - bool FoundCriticalSurface = false; - - s->TotalActiveWriteback = 0; - p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); -#endif - - p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; - p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; - p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; - p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; - p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; - p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; - p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; - if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { - p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; - p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; - p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; - p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; - } - p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); - DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); - DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); - DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); - DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); - DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); - DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); - DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); - DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); - DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); - DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); - DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); - DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); - DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); -#endif - - s->TotalActiveWriteback = 0; - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - s->TotalActiveWriteback = s->TotalActiveWriteback + 1; - } - } - - if (s->TotalActiveWriteback <= 1) { - p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; - } else { - p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; - } - if (p->USRRetrainingRequired) - p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; - - if (s->TotalActiveWriteback <= 1) { - p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; - p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; - } else { - p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; - p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; - } - - if (p->USRRetrainingRequired) - p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; - - if (p->USRRetrainingRequired) - p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); - DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); - DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); - DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); - DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); -#endif - - s->TotalPixelBW = 0.0; - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; - double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; - double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] - * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); - } - - *p->global_fclk_change_supported = true; - *p->global_dram_clock_change_supported = true; - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; - double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; - double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; - double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; - double LBBitPerPixel = 57; - - s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); - s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); - DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); - DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); - DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); -#endif - - s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); - s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); - - s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; - if (p->UnboundedRequestEnabled) { - s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; - } - - s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; - s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); - s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; - - s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; - - if (p->NumberOfActiveSurfaces > 1) { - s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; - } - - if (p->BytePerPixelDETC[k] > 0) { - s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; - s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); - s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; - s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; - if (p->NumberOfActiveSurfaces > 1) { - s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; - } - s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); - } else { - s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; - } - - s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; - s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; - s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; - s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; - - if (p->VActiveLatencyHidingMargin) - p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; - - if (p->VActiveLatencyHidingUs) - p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; - - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 - / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height - * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width - / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { - s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; - } - s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; - - s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; - - s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); - s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); - } - p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); - - uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; - reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; - - p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; - if (s->ActiveFCLKChangeLatencyMargin[k] > 0) - p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; - else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) - p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; - - if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) - *p->global_fclk_change_supported = false; - - p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; - if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { - if (p->display_cfg->overrides.all_streams_blanked || - (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; - else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; - else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; - } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; - else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; - else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; - else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; - else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) - p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; - - if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) - *p->global_dram_clock_change_supported = false; - - s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); - s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); - s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); - s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); - DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); - DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); - DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); -#endif - p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; - - if (p->BytePerPixelDETC[k] > 0) { - s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); - s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); - s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - - if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) - p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); - else - p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); - DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); - DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); -#endif - } - } - - *p->g6_temp_read_support = true; - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && - (s->g6_temp_read_latency_margin[k] < 0)) { - *p->g6_temp_read_support = false; - } - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) - || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { - FoundCriticalSurface = true; - *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; - } - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); - DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); - DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); - DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); -#endif -} - -static void calculate_bytes_to_fetch_required_to_hide_latency( - struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p) -{ - unsigned int dst_lines_to_hide; - unsigned int src_lines_to_hide_l; - unsigned int src_lines_to_hide_c; - unsigned int plane_index; - unsigned int stream_index; - - for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) { - if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index])) - continue; - - stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index; - - dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us / - ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total / - (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0)); - - src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide, - p->swath_height_l[plane_index]); - p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index]; - - src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide, - p->swath_height_c[plane_index]); - p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index]; - - if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) { - p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index]; - if (p->meta_row_height_c[plane_index]) { - p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index]; - } - } - - if (p->display_cfg->gpuvm_enable == true) { - p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index]; - if (p->dpte_row_height_c[plane_index]) { - p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index]; - } - } - } -} - -static noinline_for_stack void calculate_vactive_det_fill_latency( - const struct dml2_display_cfg *display_cfg, - unsigned int num_active_planes, - unsigned int bytes_required_l[], - unsigned int bytes_required_c[], - double dcc_dram_bw_nom_overhead_factor_p0[], - double dcc_dram_bw_nom_overhead_factor_p1[], - double surface_read_bw_l[], - double surface_read_bw_c[], - double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], - double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], - /* output */ - double vactive_det_fill_delay_us[]) -{ - double effective_excess_bandwidth; - double effective_excess_bandwidth_l; - double effective_excess_bandwidth_c; - double adj_factor; - unsigned int plane_index; - unsigned int soc_state; - unsigned int bw_type; - - for (plane_index = 0; plane_index < num_active_planes; plane_index++) { - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) - continue; - - vactive_det_fill_delay_us[plane_index] = 0.0; - for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) { - for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) { - effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]); - - /* luma */ - adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0; - - effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; - if (effective_excess_bandwidth_l > 0.0) { - vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l); - } - - /* chroma */ - adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0; - - effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; - if (effective_excess_bandwidth_c > 0.0) { - vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c); - } - } - } - } -} - -static void calculate_excess_vactive_bandwidth_required( - const struct dml2_display_cfg *display_cfg, - unsigned int num_active_planes, - unsigned int bytes_required_l[], - unsigned int bytes_required_c[], - /* outputs */ - double excess_vactive_fill_bw_l[], - double excess_vactive_fill_bw_c[]) -{ - unsigned int plane_index; - - for (plane_index = 0; plane_index < num_active_planes; plane_index++) { - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) - continue; - - excess_vactive_fill_bw_l[plane_index] = 0.0; - excess_vactive_fill_bw_c[plane_index] = 0.0; - - if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) { - excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; - excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; - } - } -} - -static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) -{ - double bw_mbps = 0; - bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; - - return bw_mbps; -} - -static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) -{ - double uclk_mhz = 0; - - uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; - - return uclk_mhz; -} - -static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) -{ - unsigned int i; - unsigned int index = 0; - - for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); - - if (i == 0) - index = 0; - else - index = i - 1; - - if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || - per_uclk_dpm_params[i].minimum_uclk_khz == 0) { - break; - } - } - DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); - return index; -} - -static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) -{ - unsigned int i; - bool clk_entry_found = false; - - for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); - - if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { - clk_entry_found = true; - break; - } - } - - if (!clk_entry_found) - DML_ASSERT(clk_entry_found); -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); -#endif - return i; -} - -static unsigned int get_pipe_flip_bytes( - double hostvm_inefficiency_factor, - unsigned int vm_bytes, - unsigned int dpte_row_bytes, - unsigned int meta_row_bytes) -{ - unsigned int flip_bytes = 0; - - flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes); - flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor); - - return flip_bytes; -} - -static void calculate_hostvm_inefficiency_factor( - double *HostVMInefficiencyFactor, - double *HostVMInefficiencyFactorPrefetch, - - bool gpuvm_enable, - bool hostvm_enable, - unsigned int remote_iommu_outstanding_translations, - unsigned int max_outstanding_reqs, - double urg_bandwidth_avail_active_pixel_and_vm, - double urg_bandwidth_avail_active_vm_only) -{ - *HostVMInefficiencyFactor = 1; - *HostVMInefficiencyFactorPrefetch = 1; - - if (gpuvm_enable && hostvm_enable) { - *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; - *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; - - if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) - *HostVMInefficiencyFactorPrefetch = 4; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); - DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); - DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); -#endif - } -} - -struct dml2_core_internal_g6_temp_read_blackouts_table { - struct { - unsigned int uclk_khz; - unsigned int blackout_us; - } entries[DML_MAX_CLK_TABLE_SIZE]; -}; - -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { - .entries = { - { - .uclk_khz = 96000, - .blackout_us = 23, - }, - { - .uclk_khz = 435000, - .blackout_us = 10, - }, - { - .uclk_khz = 521000, - .blackout_us = 10, - }, - { - .uclk_khz = 731000, - .blackout_us = 8, - }, - { - .uclk_khz = 822000, - .blackout_us = 8, - }, - { - .uclk_khz = 962000, - .blackout_us = 5, - }, - { - .uclk_khz = 1069000, - .blackout_us = 5, - }, - { - .uclk_khz = 1187000, - .blackout_us = 5, - }, - }, -}; - -static double get_g6_temp_read_blackout_us( - struct dml2_soc_bb *soc, - unsigned int uclk_freq_khz, - unsigned int min_clk_index) -{ - unsigned int i; - unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; - - if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) { - /* overrides are present in the SoC BB */ - return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index]; - } - - /* use internal table */ - blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; - - for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz || - core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) { - break; - } - - blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us; - } - - return (double)blackout_us; -} - -static double get_max_urgent_latency_us( - struct dml2_dcn4x_soc_qos_params *dcn4x, - double uclk_freq_mhz, - double FabricClock, - unsigned int min_clk_index) -{ - double latency; - latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz - * (1 + dcn4x->umc_max_latency_margin / 100.0) - + dcn4x->mall_overhead_fclk_cycles / FabricClock - + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock - * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); - return latency; -} - -static void calculate_pstate_keepout_dst_lines( - const struct dml2_display_cfg *display_cfg, - const struct dml2_core_internal_watermarks *watermarks, - unsigned int pstate_keepout_dst_lines[]) -{ - const struct dml2_stream_parameters *stream_descriptor; - unsigned int i; - - for (i = 0; i < display_cfg->num_planes; i++) { - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { - stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; - - pstate_keepout_dst_lines[i] = - (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); - - if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { - pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; - } - } - } -} - -static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, - const struct dml2_display_cfg *display_cfg) -{ - struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; - struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; - struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; -#ifdef DML_GLOBAL_PREFETCH_CHECK - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; -#endif - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; - - double min_return_bw_for_latency; - unsigned int k; - - mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - - calculate_hostvm_inefficiency_factor( - &s->HostVMInefficiencyFactor, - &s->HostVMInefficiencyFactorPrefetch, - - display_cfg->gpuvm_enable, - display_cfg->hostvm_enable, - mode_lib->ip.remote_iommu_outstanding_translations, - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], - mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); - - mode_lib->ms.Total3dlutActive = 0; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) - mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; - - // Calculate tdlut schedule related terms - calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; - calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; - calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; - calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; - calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; - calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); - - // output - calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; - calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; - calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; - calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); - } - - min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - CalculateExtraLatency( - display_cfg, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, - s->ReorderingBytes, - mode_lib->ms.DCFCLK, - mode_lib->ms.FabricClock, - mode_lib->ip.pixel_chunk_size_kbytes, - min_return_bw_for_latency, - mode_lib->ms.num_active_planes, - mode_lib->ms.NoOfDPP, - mode_lib->ms.dpte_group_bytes, - s->tdlut_bytes_per_group, - s->HostVMInefficiencyFactor, - s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, - mode_lib->soc.qos_parameters.qos_type, - !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.request_size_bytes_luma, - mode_lib->ms.support.request_size_bytes_chroma, - mode_lib->ip.meta_chunk_size_kbytes, - mode_lib->ip.dchub_arb_to_ret_delay, - mode_lib->ms.TripToMemory, - mode_lib->ip.hostvm_mode, - - // output - &mode_lib->ms.ExtraLatency, - &mode_lib->ms.ExtraLatency_sr, - &mode_lib->ms.ExtraLatencyPrefetch); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - s->impacted_dst_y_pre[k] = 0; - - s->recalc_prefetch_schedule = 0; - s->recalc_prefetch_done = 0; - do { - mode_lib->ms.support.PrefetchSupported = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; - - s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane0.width, - display_cfg->plane_descriptors[k].composition.viewport.plane0.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane1.width, - display_cfg->plane_descriptors[k].composition.viewport.plane1.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - - mode_lib->ms.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); - - myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; - myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; - myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; - myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; - myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; - myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; - myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; - myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; - myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - myPipe->ODMMode = mode_lib->ms.ODMMode[k]; - myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); -#endif - CalculatePrefetchSchedule_params->display_cfg = display_cfg; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; - CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; - CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; - CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; - CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); - CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; - CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; - CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; - CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; - CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; - CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; - - // output - CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; - CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; - CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c - CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; - CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; - CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; - CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; - CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; - CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; - CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; - CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; - CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; - CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; - CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; - - mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); - - mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); - } // for k num_planes - - CalculateDCFCLKDeepSleepTdlut( - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.SwathWidthY, - mode_lib->ms.SwathWidthC, - mode_lib->ms.NoOfDPP, - mode_lib->ms.PSCL_FACTOR, - mode_lib->ms.PSCL_FACTOR_CHROMA, - mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->soc.return_bus_width_bytes, - mode_lib->ms.RequiredDISPCLK, - s->tdlut_bytes_to_deliver, - s->prefetch_swath_time_us, - - /* Output */ - &mode_lib->ms.dcfclk_deepsleep); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.dst_y_prefetch[k] < 2.0 - || mode_lib->ms.LinesForVM[k] >= 32.0 - || mode_lib->ms.LinesForDPTERow[k] >= 16.0 - || mode_lib->ms.NoTimeForPrefetch[k] == true - || s->DSTYAfterScaler[k] > 8) { - mode_lib->ms.support.PrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); - } - } - - mode_lib->ms.support.DynamicMetadataSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { - mode_lib->ms.support.DynamicMetadataSupported = false; - } - } - - mode_lib->ms.support.VRatioInPrefetchSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { - mode_lib->ms.support.VRatioInPrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); - } - } - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; - - // By default, do not recalc prefetch schedule - s->recalc_prefetch_schedule = 0; - - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok - if (mode_lib->ms.support.PrefetchSupported) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - // Calculate Urgent burst factor for prefetch -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); -#endif - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->ms.swath_width_luma_ub[k], - mode_lib->ms.swath_width_chroma_ub[k], - mode_lib->ms.SwathHeightY[k], - mode_lib->ms.SwathHeightC[k], - s->line_times[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.VRatioPreY[k], - mode_lib->ms.VRatioPreC[k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeY[k], - mode_lib->ms.DETBufferSizeC[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChromaPre[k], - &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - - // Calculate urgent bandwidth required, both urg and non urg peak bandwidth - // assume flip bw is 0 at this point - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - mode_lib->ms.final_flip_bw[k] = 0; - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 0; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - // Check urg peak bandwidth against available urg bw - // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) - check_urgent_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth - &s->dummy_single[1], // double* frac_urg_bandwidth_mall - &mode_lib->ms.support.UrgVactiveBandwidthSupport, - &mode_lib->ms.support.PrefetchBandwidthSupported, - - mode_lib->soc.mall_allocated_for_dcn_mbytes, - mode_lib->ms.support.non_urg_bandwidth_required, - mode_lib->ms.support.urg_vactive_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; - DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { - mode_lib->ms.support.PrefetchSupported = false; - DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - } - -#ifdef DML_GLOBAL_PREFETCH_CHECK - if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { - CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; - CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; - CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; - CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; - CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; - CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; - CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; - CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; - CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; - if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; - - CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / - ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); - - // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible - CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; - CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; - mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); - s->recalc_prefetch_done = 1; - s->recalc_prefetch_schedule = 1; - } -#endif - } // prefetch schedule ok, do urg bw and flip schedule - } while (s->recalc_prefetch_schedule); - - // Flip Schedule - // Both prefetch schedule and BW okay - if (mode_lib->ms.support.PrefetchSupported == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_qual, // no flip - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( - s->HostVMInefficiencyFactor, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; - - } - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 1, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.Tno_bw_flip[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->ms.meta_row_bytes[k], - s->meta_row_height_luma[k], - s->meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - /* Output */ - &mode_lib->ms.dst_y_per_vm_flip[k], - &mode_lib->ms.dst_y_per_row_flip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth_flip - &mode_lib->ms.support.ImmediateFlipSupport, - - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_flip, - mode_lib->ms.support.non_urg_bandwidth_required_flip, - mode_lib->ms.support.urg_bandwidth_available); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - } else { // if prefetch not support, assume iflip is not supported too - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; - s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; - s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; - s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; - s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; - s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; - s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; - s->mSOCParameters.USRRetrainingLatency = 0; - s->mSOCParameters.SMNLatency = 0; - s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); - s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); - s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; - s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; - - CalculateWatermarks_params->display_cfg = display_cfg; - CalculateWatermarks_params->USRRetrainingRequired = false; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; - CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; - CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; - CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; - CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; - CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; - CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; - - // Output - CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; - CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; - CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; - CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; - CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; - CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - - calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); - DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); - -} - - -static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) -{ - struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; - const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; - const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; - - double outstanding_latency_us = 0; - - struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; - struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; - struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; - unsigned int k, m, n; - - memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); - memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); - - mode_lib->ms.num_active_planes = display_cfg->num_planes; - get_stream_output_bpp(s->OutputBpp, display_cfg); - - mode_lib->ms.state_idx = in_out_params->min_clk_index; - mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); - mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); - mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); - mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; - mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; - mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; - mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; - mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; - mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); - mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); - mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); - -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); - DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); - DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); - DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); - DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); - DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); - DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); - DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); -#endif - - CalculateMaxDETAndMinCompressedBufferSize( - mode_lib->ip.config_return_buffer_size_in_kbytes, - mode_lib->ip.config_return_buffer_segment_size_in_kbytes, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->ip.max_num_dpp, - display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, - display_cfg->overrides.hw.force_nom_det_size_kbytes.value, - mode_lib->ip.dcn_mrq_present, - - /* Output */ - &mode_lib->ms.MaxTotalDETInKByte, - &mode_lib->ms.NomDETInKByte, - &mode_lib->ms.MinCompressedBufferSizeInKByte); - - PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); - - /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ - - /*Scale Ratio, taps Support Check*/ - mode_lib->ms.support.ScaleRatioAndTapsSupport = true; - // Many core tests are still setting scaling parameters "incorrectly" - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false - && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { - mode_lib->ms.support.ScaleRatioAndTapsSupport = false; - } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 - || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps - || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps - || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) - && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || - (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { - mode_lib->ms.support.ScaleRatioAndTapsSupport = false; - } - } - - /*Source Format, Pixel Format and Scan Support Check*/ - mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { - mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateBytePerPixelAndBlockSizes( - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->plane_descriptors[k].surface.tiling, - display_cfg->plane_descriptors[k].surface.plane0.pitch, - display_cfg->plane_descriptors[k].surface.plane1.pitch, - - /* Output */ - &mode_lib->ms.BytePerPixelY[k], - &mode_lib->ms.BytePerPixelC[k], - &mode_lib->ms.BytePerPixelInDETY[k], - &mode_lib->ms.BytePerPixelInDETC[k], - &mode_lib->ms.Read256BlockHeightY[k], - &mode_lib->ms.Read256BlockHeightC[k], - &mode_lib->ms.Read256BlockWidthY[k], - &mode_lib->ms.Read256BlockWidthC[k], - &mode_lib->ms.MacroTileHeightY[k], - &mode_lib->ms.MacroTileHeightC[k], - &mode_lib->ms.MacroTileWidthY[k], - &mode_lib->ms.MacroTileWidthC[k], - &mode_lib->ms.surf_linear128_l[k], - &mode_lib->ms.surf_linear128_c[k]); - } - - /*Bandwidth Support Check*/ - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { - mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; - mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; - } else { - mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - - mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * - display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); - DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); -#endif - } - - // Writeback bandwidth - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { - mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total - / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total - / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; - } else { - mode_lib->ms.WriteBandwidth[k][0] = 0.0; - } - } - - /*Writeback Latency support check*/ - mode_lib->ms.support.WritebackLatencySupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && - (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { - mode_lib->ms.support.WritebackLatencySupport = false; - } - } - - - /* Writeback Scale Ratio and Taps Support Check */ - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; - } - if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { - mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; - } - } - } - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateSinglePipeDPPCLKAndSCLThroughput( - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, - mode_lib->ip.max_pscl_lb_bw_pix_per_clk, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, - /* Output */ - &mode_lib->ms.PSCL_FACTOR[k], - &mode_lib->ms.PSCL_FACTOR_CHROMA[k], - &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); - } - - // Max Viewport Size support - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { - s->MaximumSwathWidthSupportLuma = 15360; - } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video - s->MaximumSwathWidthSupportLuma = 7680 + 16; - } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video - s->MaximumSwathWidthSupportLuma = 4320 + 16; - } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha - s->MaximumSwathWidthSupportLuma = 5120 + 16; - } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp - s->MaximumSwathWidthSupportLuma = 3072 + 16; - } else { - s->MaximumSwathWidthSupportLuma = 6144 + 16; - } - - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { - s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); - } else { - s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; - } - - unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits; - unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits; - -/* -#if defined(DV_BUILD) - // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. - if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { - lb_buffer_size_bits_luma = 34620 * 57; - lb_buffer_size_bits_chroma = 13560 * 57; - } -#endif -*/ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / - (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); - if (mode_lib->ms.BytePerPixelC[k] == 0.0) { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; - } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / - (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); - } - - mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); - mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); - - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); - - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); - DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); - } - - /* Cursor Support Check */ - mode_lib->ms.support.CursorSupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { - if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) - mode_lib->ms.support.CursorSupport = false; - } - } - - /* Valid Pitch Check */ - mode_lib->ms.support.PitchSupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - - // data pitch - unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; - - if (mode_lib->ms.surf_linear128_l[k]) - alignment_l = alignment_l / 2; - - mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; - - if (mode_lib->ms.surf_linear128_c[k]) - alignment_c = alignment_c / 2; - mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); - } else { - mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; - } - - if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || - mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { - mode_lib->ms.support.PitchSupport = false; -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); - DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); - DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); -#endif - } - - // meta pitch - if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { - mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, - display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); - - if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) - mode_lib->ms.support.PitchSupport = false; - - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, - display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); - - if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) - mode_lib->ms.support.PitchSupport = false; - } - } else { - mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; - mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; - } - } - - mode_lib->ms.support.ViewportExceedsSurface = false; - if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || - display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); - DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); - DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); -#endif - } - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } - } - } - } - - CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; - CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; - CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; - CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; - CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; - CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; - CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; - CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; - CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; - CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - // output - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; - CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; - CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; - CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; - CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; - CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; - CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; - CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; - CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; - - // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. - CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); - - mode_lib->ms.TotalNumberOfActiveDPP = 0; - mode_lib->ms.TotalNumberOfActiveOPP = 0; - mode_lib->ms.support.TotalAvailablePipesSupport = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - /*Number Of DSC Slices*/ - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) { - - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) - mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices; - else { - if (s->PixelClockBackEnd[k] > 4800) { - mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); - } else if (s->PixelClockBackEnd[k] > 2400) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 8; - } else if (s->PixelClockBackEnd[k] > 1200) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 4; - } else if (s->PixelClockBackEnd[k] > 340) { - mode_lib->ms.support.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->ms.support.NumberOfDSCSlices[k] = 1; - } - } - } else { - mode_lib->ms.support.NumberOfDSCSlices[k] = 0; - } - - CalculateODMMode( - mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, - mode_lib->ms.max_dispclk_freq_mhz, - false, // DSCEnable - mode_lib->ms.TotalNumberOfActiveDPP, - mode_lib->ms.TotalNumberOfActiveOPP, - mode_lib->ip.max_num_dpp, - mode_lib->ip.max_num_opp, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - mode_lib->ms.support.NumberOfDSCSlices[k], - - /* Output */ - &s->TotalAvailablePipesSupportNoDSC, - &s->NumberOfDPPNoDSC, - &s->ODMModeNoDSC, - &s->RequiredDISPCLKPerSurfaceNoDSC); - - CalculateODMMode( - mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, - mode_lib->ms.max_dispclk_freq_mhz, - true, // DSCEnable - mode_lib->ms.TotalNumberOfActiveDPP, - mode_lib->ms.TotalNumberOfActiveOPP, - mode_lib->ip.max_num_dpp, - mode_lib->ip.max_num_opp, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - mode_lib->ms.support.NumberOfDSCSlices[k], - - /* Output */ - &s->TotalAvailablePipesSupportDSC, - &s->NumberOfDPPDSC, - &s->ODMModeDSC, - &s->RequiredDISPCLKPerSurfaceDSC); - - CalculateOutputLink( - &mode_lib->scratch, - ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), - ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), - ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), - mode_lib->soc.phy_downspread_percent, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - s->PixelClockBackEnd[k], - s->OutputBpp[k], - mode_lib->ip.maximum_dsc_bits_per_component, - mode_lib->ms.support.NumberOfDSCSlices[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, - s->ODMModeNoDSC, - s->ODMModeDSC, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, - - /* Output */ - &mode_lib->ms.RequiresDSC[k], - &mode_lib->ms.RequiresFEC[k], - &mode_lib->ms.OutputBpp[k], - &mode_lib->ms.OutputType[k], - &mode_lib->ms.OutputRate[k], - &mode_lib->ms.RequiredSlots[k]); - - if (s->OutputBpp[k] == 0.0) { - s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; - } - - if (mode_lib->ms.RequiresDSC[k] == false) { - mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; - mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; - if (!s->TotalAvailablePipesSupportNoDSC) - mode_lib->ms.support.TotalAvailablePipesSupport = false; - mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; - } else { - mode_lib->ms.ODMMode[k] = s->ODMModeDSC; - mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; - if (!s->TotalAvailablePipesSupportDSC) - mode_lib->ms.support.TotalAvailablePipesSupport = false; - mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; - } -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); - DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); -#endif - - // ensure the number dsc slices is integer multiple based on ODM mode - mode_lib->ms.support.DSCSlicesODMModeSupported = true; - if (mode_lib->ms.RequiresDSC[k]) { - // fail a ms check if the override num_slices doesn't align with odm mode setting - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) { - if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) - mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0); - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) - mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12); - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) - mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); -#if defined(__DML_VBA_DEBUG__) - if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { - DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); - DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); - } -#endif - } else { - // safe guard to ensure the dml derived dsc slices and odm setting are compatible - if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) - mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) - mode_lib->ms.support.NumberOfDSCSlices[k] = 12; - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) - mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); - } - - } else { - mode_lib->ms.support.NumberOfDSCSlices[k] = 0; - } - } - - mode_lib->ms.support.incorrect_imall_usage = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) - mode_lib->ms.support.incorrect_imall_usage = 1; - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.MPCCombine[k] = false; - mode_lib->ms.NoOfDPP[k] = 1; - mode_lib->ms.NoOfOPP[k] = 1; - - if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { - mode_lib->ms.MPCCombine[k] = false; - mode_lib->ms.NoOfDPP[k] = 4; - mode_lib->ms.NoOfOPP[k] = 4; - } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { - mode_lib->ms.MPCCombine[k] = false; - mode_lib->ms.NoOfDPP[k] = 3; - mode_lib->ms.NoOfOPP[k] = 3; - } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { - mode_lib->ms.MPCCombine[k] = false; - mode_lib->ms.NoOfDPP[k] = 2; - mode_lib->ms.NoOfOPP[k] = 2; - } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { - mode_lib->ms.MPCCombine[k] = true; - mode_lib->ms.NoOfDPP[k] = 2; - } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { - mode_lib->ms.MPCCombine[k] = false; - mode_lib->ms.NoOfDPP[k] = 1; - if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { - DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); - } - } else { - if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { - mode_lib->ms.MPCCombine[k] = true; - mode_lib->ms.NoOfDPP[k] = 2; - } - } -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); -#endif - } - - mode_lib->ms.TotalNumberOfActiveDPP = 0; - mode_lib->ms.TotalNumberOfActiveOPP = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k]; - mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k]; - } - if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) - mode_lib->ms.support.TotalAvailablePipesSupport = false; - if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp) - mode_lib->ms.support.TotalAvailablePipesSupport = false; - - - mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; - for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NoOfDPP[k] == 1) - mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; - } - - //DISPCLK/DPPCLK - mode_lib->ms.WritebackRequiredDISPCLK = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, - CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, - mode_lib->ip.writeback_line_buffer_buffer_size)); - } - } - - mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); - } - - mode_lib->ms.GlobalDPPCLK = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; - mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); - } - - mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); - - /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */ - s->TotalNumberOfActiveOTG = 0; - s->TotalNumberOfActiveHDMIFRL = 0; - s->TotalNumberOfActiveDP2p0 = 0; - s->TotalNumberOfActiveDP2p0Outputs = 0; - s->TotalNumberOfActiveWriteback = 0; - memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { - s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; - - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) - s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; - - s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) - s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { - s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; - // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup - //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { - s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; - //} - } - } - } - } - - /* Writeback Mode Support Check */ - mode_lib->ms.support.EnoughWritebackUnits = 1; - if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { - mode_lib->ms.support.EnoughWritebackUnits = false; - } - mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); - mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs); - mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); - - - mode_lib->ms.support.ExceededMultistreamSlots = false; - mode_lib->ms.support.LinkCapacitySupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { - mode_lib->ms.support.LinkCapacitySupport = false; - } - } - - mode_lib->ms.support.P2IWith420 = false; - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; - mode_lib->ms.support.DSC422NativeNotSupported = false; - mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; - mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; - mode_lib->ms.support.BPPForMultistreamNotIndicated = false; - mode_lib->ms.support.MultistreamWithHDMIOreDP = false; - mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; - mode_lib->ms.support.NotEnoughLanesForMSO = false; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) - mode_lib->ms.support.P2IWith420 = true; - - if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) - mode_lib->ms.support.DSC422NativeNotSupported = true; - - if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || - ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) - mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; - - // FIXME_STAGE2 - //if (display_cfg->output.OutputMultistreamEn[k] == 1) { - // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) - // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; - // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) - // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; - // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { - // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) - // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; - // } - //} - - if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { - // FIXME_STAGE2 - //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) - // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; - //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { - // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) - // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; - //} - } - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) - mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; - - if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) - mode_lib->ms.support.NotEnoughLanesForMSO = true; - } - } - - mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl && - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { - mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK( - mode_lib->ms.RequiresDSC[k], - s->PixelClockBackEnd[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - mode_lib->ms.OutputBpp[k], - mode_lib->ms.support.NumberOfDSCSlices[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); - - if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { - mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; - } - } else { - /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus - * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider - * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK - * required - by setting phantom dtbclk to 0 we ignore it. - */ - mode_lib->ms.RequiredDTBCLK[k] = 0; - } - } - - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { - s->DSCFormatFactor = 2; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { - s->DSCFormatFactor = 1; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { - s->DSCFormatFactor = 2; - } else { - s->DSCFormatFactor = 1; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); -#endif - if (mode_lib->ms.RequiresDSC[k] == true) { - s->PixelClockBackEndFactor = 3.0; - - if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) - s->PixelClockBackEndFactor = 12.0; - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) - s->PixelClockBackEndFactor = 9.0; - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) - s->PixelClockBackEndFactor = 6.0; - - mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; - if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { - mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); - DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); -#endif - } - } - } - - /* Check DSC Unit and Slices Support */ - mode_lib->ms.support.NotEnoughDSCSlices = false; - s->TotalDSCUnitsRequired = 0; - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; - memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { - s->NumDSCUnitRequired = 1; - - if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) - s->NumDSCUnitRequired = 4; - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) - s->NumDSCUnitRequired = 3; - else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) - s->NumDSCUnitRequired = 2; - - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) - mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; - s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; - - if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) - mode_lib->ms.support.NotEnoughDSCSlices = true; - } - s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; - } - - mode_lib->ms.support.NotEnoughDSCUnits = false; - if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { - mode_lib->ms.support.NotEnoughDSCUnits = true; - } - - /*DSC Delay per state*/ - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], - mode_lib->ms.ODMMode[k], - mode_lib->ip.maximum_dsc_bits_per_component, - s->OutputBpp[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, - mode_lib->ms.support.NumberOfDSCSlices[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - s->PixelClockBackEnd[k]); - } - - // Figure out the swath and DET configuration after the num dpp per plane is figured out - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; - CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; - - // output - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; - CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; - CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; - CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; - CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; - CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; - CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; - - CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); - - if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - mode_lib->ms.SurfaceSizeInMALL[k] = 0; - mode_lib->ms.support.ExceededMALLSize = 0; - } else { - CalculateSurfaceSizeInMall( - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->soc.mall_allocated_for_dcn_mbytes, - - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.Read256BlockWidthY, - mode_lib->ms.Read256BlockWidthC, - mode_lib->ms.Read256BlockHeightY, - mode_lib->ms.Read256BlockHeightC, - mode_lib->ms.MacroTileWidthY, - mode_lib->ms.MacroTileWidthC, - mode_lib->ms.MacroTileHeightY, - mode_lib->ms.MacroTileHeightC, - - /* Output */ - mode_lib->ms.SurfaceSizeInMALL, - &mode_lib->ms.support.ExceededMALLSize); - } - - mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { - mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; - } - } - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; - s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; - s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; - s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; - s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; - s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; - s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; - s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; - s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; - s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; - s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; - s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; - s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; - s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; - s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; - - s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; - s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; - } - - CalculateVMRowAndSwath_params->display_cfg = display_cfg; - CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; - CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; - CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; - CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; - CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; - CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - // output - CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; - CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; - CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; - CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; - CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; - CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA - CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA - CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; - CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; - CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; - CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; - CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; - CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; - CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; - CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; - CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; - CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; - CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; - CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; - CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; - CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; - CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; - CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; - CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; - CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; - CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; - CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; - CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; - CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; - CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; - CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; - CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; - CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; - CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; - CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; - CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; - CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; - CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; - CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; - CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; - CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; - CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; - CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; - CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; - CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; - - CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); - - mode_lib->ms.support.PTEBufferSizeNotExceeded = true; - mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) - mode_lib->ms.support.PTEBufferSizeNotExceeded = false; - - if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) - mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); -#endif - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); - DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); -#endif - - /* VActive bytes to fetch for UCLK P-State */ - calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; - calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; - calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY; - calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC; - calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - - /* outputs */ - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; - - calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); - - /* Excess VActive bandwidth required to fill DET */ - calculate_excess_vactive_bandwidth_required( - display_cfg, - mode_lib->ms.num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, - /* outputs */ - mode_lib->ms.excess_vactive_fill_bw_l, - mode_lib->ms.excess_vactive_fill_bw_c); - - mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, - mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, - mode_lib->ms.FabricClock, - mode_lib->ms.uclk_freq_mhz, - mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); - - mode_lib->ms.TripToMemory = CalculateTripToMemory( - mode_lib->ms.UrgLatency, - mode_lib->ms.FabricClock, - mode_lib->ms.uclk_freq_mhz, - mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); - - mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); - - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - bool cursor_not_enough_urgent_latency_hiding = false; - - if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); - - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->ms.UrgLatency, - - // output - &mode_lib->ms.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); - } - - mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); -#endif - - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->ms.swath_width_luma_ub[k], - mode_lib->ms.swath_width_chroma_ub[k], - mode_lib->ms.SwathHeightY[k], - mode_lib->ms.SwathHeightC[k], - line_time_us, - mode_lib->ms.UrgLatency, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeY[k], - mode_lib->ms.DETBufferSizeC[k], - - // Output - &mode_lib->ms.UrgentBurstFactorLuma[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], - &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); - - mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; - } - - CalculateDCFCLKDeepSleep( - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.SwathWidthY, - mode_lib->ms.SwathWidthC, - mode_lib->ms.NoOfDPP, - mode_lib->ms.PSCL_FACTOR, - mode_lib->ms.PSCL_FACTOR_CHROMA, - mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->soc.return_bus_width_bytes, - - /* Output */ - &mode_lib->ms.dcfclk_deepsleep); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; - } else { - mode_lib->ms.WritebackDelayTime[k] = 0.0; - } - } - - // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); - s->MaximumVStartup[k] = CalculateMaxVStartup( - mode_lib->ip.ptoi_supported, - mode_lib->ip.vblank_nom_default_us, - &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, - mode_lib->ms.WritebackDelayTime[k]); - mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); -#endif - - /* Immediate Flip and MALL parameters */ - s->ImmediateFlipRequired = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; - } - - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = - mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || - ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && - (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); - } - - mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || - ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || - ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); - } - - s->FullFrameMALLPStateMethod = false; - s->SubViewportMALLPStateMethod = false; - s->PhantomPipeMALLPStateMethod = false; - s->SubViewportMALLRefreshGreaterThan120Hz = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) - s->FullFrameMALLPStateMethod = true; - if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { - s->SubViewportMALLPStateMethod = true; - if (!display_cfg->overrides.enable_subvp_implicit_pmo) { - // For dv, small frame tests will have very high refresh rate - unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / - (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / - (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); - if (refresh_rate > 120) - s->SubViewportMALLRefreshGreaterThan120Hz = true; - } - } - if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) - s->PhantomPipeMALLPStateMethod = true; - } - mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || - (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); - DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); - DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); - DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); - DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); - DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); - DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); -#endif - - mode_lib->ms.support.OutstandingRequestsSupport = true; - mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; - - mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); - - mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); - - mode_lib->ms.support.max_non_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { - outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] - / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); - - if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { - mode_lib->ms.support.OutstandingRequestsSupport = false; - } - - if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { - mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); - DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); - DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); -#endif - } - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { - outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] - / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); - - if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { - mode_lib->ms.support.OutstandingRequestsSupport = false; - } - - if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { - mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); -#endif - } - } - - memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); - if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; - mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; - mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; - mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; - } - } else { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; - calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; - calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; - calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; - calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - - calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; - calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); - calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; - calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; - calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; - - calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; - calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; - calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; - calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; - calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; - calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; - calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; - calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; - calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; - - calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; - calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; - calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; - calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; - calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; - calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; - calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; - calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; - calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; - - // output - calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; - calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; - calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; - calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; - - calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; - calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; - calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k]; - calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; - calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; - - calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; - calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; - calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k]; - calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; - calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; - - calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; - calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; - calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; - - calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); - } - - calculate_mall_bw_overhead_factor( - mode_lib->ms.mall_prefetch_sdp_overhead_factor, - mode_lib->ms.mall_prefetch_dram_overhead_factor, - - // input - display_cfg, - mode_lib->ms.num_active_planes); - } - - // Calculate all the bandwidth available - // Need anothe bw for latency evaluation - calculate_bandwidth_available( - mode_lib->ms.support.avg_bandwidth_available_min, // not used - mode_lib->ms.support.avg_bandwidth_available, // not used - mode_lib->ms.support.urg_bandwidth_available_min_latency, - mode_lib->ms.support.urg_bandwidth_available, // not used - mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used - mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used - - &mode_lib->soc, - display_cfg->hostvm_enable, - mode_lib->ms.DCFCLK, - mode_lib->ms.FabricClock, - mode_lib->ms.dram_bw_mbps); - - calculate_bandwidth_available( - mode_lib->ms.support.avg_bandwidth_available_min, - mode_lib->ms.support.avg_bandwidth_available, - mode_lib->ms.support.urg_bandwidth_available_min, - mode_lib->ms.support.urg_bandwidth_available, - mode_lib->ms.support.urg_bandwidth_available_vm_only, - mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, - - &mode_lib->soc, - display_cfg->hostvm_enable, - mode_lib->ms.MaxDCFCLK, - mode_lib->ms.MaxFabricClock, -#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW - mode_lib->ms.dram_bw_mbps); -#else - mode_lib->ms.max_dram_bw_mbps); -#endif - - // Average BW support check - calculate_avg_bandwidth_required( - mode_lib->ms.support.avg_bandwidth_required, - // input - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->ms.cursor_bw, - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, - mode_lib->ms.mall_prefetch_dram_overhead_factor, - mode_lib->ms.mall_prefetch_sdp_overhead_factor); - - for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram - mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; - mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); - mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); - } - - mode_lib->ms.support.AvgBandwidthSupport = true; - mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { - mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; - DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); - - } - } - for (m = 0; m < dml2_core_internal_soc_state_max; m++) { - for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram - if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { - mode_lib->ms.support.AvgBandwidthSupport = false; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); -#endif - } - } - } - - dml_core_ms_prefetch_check(mode_lib, display_cfg); - - mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; - - //Re-ordering Buffer Support Check - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { - if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { - mode_lib->ms.support.ROBSupport = true; - } else { - mode_lib->ms.support.ROBSupport = false; - } - } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { - mode_lib->ms.support.ROBSupport = true; - } else { - mode_lib->ms.support.ROBSupport = false; - } - } - - /* VActive fill time calculations (informative) */ - calculate_vactive_det_fill_latency( - display_cfg, - mode_lib->ms.num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, - mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->ms.surface_avg_vactive_required_bw, - mode_lib->ms.surface_peak_required_bw, - /* outputs */ - mode_lib->ms.dram_change_vactive_det_fill_delay_us); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); - DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); -#endif - - /*Mode Support, Voltage State and SOC Configuration*/ - { - if (mode_lib->ms.support.ScaleRatioAndTapsSupport - && mode_lib->ms.support.SourceFormatPixelAndScanSupport - && mode_lib->ms.support.ViewportSizeSupport - && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion - && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated - && !mode_lib->ms.support.BPPForMultistreamNotIndicated - && !mode_lib->ms.support.MultistreamWithHDMIOreDP - && !mode_lib->ms.support.ExceededMultistreamSlots - && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink - && !mode_lib->ms.support.NotEnoughLanesForMSO - && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSC422NativeNotSupported - && mode_lib->ms.support.DSCSlicesODMModeSupported - && !mode_lib->ms.support.NotEnoughDSCUnits - && !mode_lib->ms.support.NotEnoughDSCSlices - && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported - && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport - && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported - && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState - && mode_lib->ms.support.ROBSupport - && mode_lib->ms.support.OutstandingRequestsSupport - && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance - && mode_lib->ms.support.DISPCLK_DPPCLK_Support - && mode_lib->ms.support.TotalAvailablePipesSupport - && mode_lib->ms.support.NumberOfOTGSupport - && mode_lib->ms.support.NumberOfHDMIFRLSupport - && mode_lib->ms.support.NumberOfDP2p0Support - && mode_lib->ms.support.EnoughWritebackUnits - && mode_lib->ms.support.WritebackLatencySupport - && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport - && mode_lib->ms.support.CursorSupport - && mode_lib->ms.support.PitchSupport - && !mode_lib->ms.support.ViewportExceedsSurface - && mode_lib->ms.support.PrefetchSupported - && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport - && mode_lib->ms.support.AvgBandwidthSupport - && mode_lib->ms.support.DynamicMetadataSupported - && mode_lib->ms.support.VRatioInPrefetchSupported - && mode_lib->ms.support.PTEBufferSizeNotExceeded - && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded - && !mode_lib->ms.support.ExceededMALLSize - && mode_lib->ms.support.g6_temp_read_support - && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { - DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); - mode_lib->ms.support.ModeSupport = true; - } else { - DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); - mode_lib->ms.support.ModeSupport = false; - } - } - - // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). - DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); - DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; - mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; - } - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; - mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; - mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; - mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; - mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; - mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; - -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); -#endif - } - -#if defined(__DML_VBA_DEBUG__) - if (!mode_lib->ms.support.ModeSupport) - dml2_print_mode_support_info(&mode_lib->ms.support, true); - - DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); -#endif - - return mode_lib->ms.support.ModeSupport; -} - -unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params) -{ - unsigned int result; - - DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); - result = dml_core_mode_support(in_out_params); - - if (result) - *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; - - DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); - - for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) - DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); - - DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); - - return result; -} - -static void CalculatePixelDeliveryTimes( - const struct dml2_display_cfg *display_cfg, - const struct core_display_cfg_support_info *cfg_support_info, - unsigned int NumberOfActiveSurfaces, - double VRatioPrefetchY[], - double VRatioPrefetchC[], - unsigned int swath_width_luma_ub[], - unsigned int swath_width_chroma_ub[], - double PSCL_THROUGHPUT[], - double PSCL_THROUGHPUT_CHROMA[], - double Dppclk[], - unsigned int BytePerPixelC[], - unsigned int req_per_swath_ub_l[], - unsigned int req_per_swath_ub_c[], - - // Output - double DisplayPipeLineDeliveryTimeLuma[], - double DisplayPipeLineDeliveryTimeChroma[], - double DisplayPipeLineDeliveryTimeLumaPrefetch[], - double DisplayPipeLineDeliveryTimeChromaPrefetch[], - double DisplayPipeRequestDeliveryTimeLuma[], - double DisplayPipeRequestDeliveryTimeChroma[], - double DisplayPipeRequestDeliveryTimeLumaPrefetch[], - double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) -{ - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); - DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); - DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); -#endif - if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { - DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; - } else { - DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChroma[k] = 0; - } else { - if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { - DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; - } else { - DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } - - if (VRatioPrefetchY[k] <= 1) { - DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; - } else { - DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; - } - - if (BytePerPixelC[k] == 0) { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; - } else { - if (VRatioPrefetchC[k] <= 1) { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; - } else { - DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; - } - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); -#endif - } - - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - - DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; - DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; - if (BytePerPixelC[k] == 0) { - DisplayPipeRequestDeliveryTimeChroma[k] = 0; - DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; - } else { - DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; - DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); -#endif - } -} - -static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) -{ - unsigned int meta_chunk_width; - unsigned int min_meta_chunk_width; - unsigned int meta_chunk_per_row_int; - unsigned int meta_row_remainder; - unsigned int meta_chunk_threshold; - unsigned int meta_chunks_per_row_ub; - unsigned int meta_chunk_width_chroma; - unsigned int min_meta_chunk_width_chroma; - unsigned int meta_chunk_per_row_int_chroma; - unsigned int meta_row_remainder_chroma; - unsigned int meta_chunk_threshold_chroma; - unsigned int meta_chunks_per_row_ub_chroma; - unsigned int dpte_group_width_luma; - unsigned int dpte_groups_per_row_luma_ub; - unsigned int dpte_group_width_chroma; - unsigned int dpte_groups_per_row_chroma_ub; - double pixel_clock_mhz; - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - if (p->BytePerPixelC[k] == 0) { - p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; - } else { - p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - } - p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - if (p->BytePerPixelC[k] == 0) { - p->DST_Y_PER_META_ROW_NOM_C[k] = 0; - } else { - p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - } - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { - meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; - min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; - meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; - meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; - if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { - meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; - } else { - meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; - } - if (meta_row_remainder <= meta_chunk_threshold) { - meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; - } else { - meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; - } - p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * - p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / - (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; - p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / - (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; - p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / - (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; - if (p->BytePerPixelC[k] == 0) { - p->TimePerChromaMetaChunkNominal[k] = 0; - p->TimePerChromaMetaChunkVBlank[k] = 0; - p->TimePerChromaMetaChunkFlip[k] = 0; - } else { - meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; - min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; - meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); - meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; - if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { - meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; - } else { - meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; - } - if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { - meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; - } else { - meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; - } - p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; - p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; - p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; - } - } else { - p->TimePerMetaChunkNominal[k] = 0; - p->TimePerMetaChunkVBlank[k] = 0; - p->TimePerMetaChunkFlip[k] = 0; - p->TimePerChromaMetaChunkNominal[k] = 0; - p->TimePerChromaMetaChunkVBlank[k] = 0; - p->TimePerChromaMetaChunkFlip[k] = 0; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); -#endif - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - if (p->BytePerPixelC[k] == 0) { - p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; - } else { - p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - } - } - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - - if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) - p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; - else - p->time_per_tdlut_group[k] = 0; - - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); - - if (p->display_cfg->gpuvm_enable == true) { - if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { - dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); - } else { - dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); - } - if (p->use_one_row_for_frame[k]) { - dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); - } else { - dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); - } - if (dpte_groups_per_row_luma_ub <= 2) { - dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; - } - DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); - - p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; - p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; - p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; - if (p->BytePerPixelC[k] == 0) { - p->time_per_pte_group_nom_chroma[k] = 0; - p->time_per_pte_group_vblank_chroma[k] = 0; - p->time_per_pte_group_flip_chroma[k] = 0; - } else { - if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { - dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); - } else { - dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); - } - - if (p->use_one_row_for_frame[k]) { - dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); - } else { - dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); - } - if (dpte_groups_per_row_chroma_ub <= 2) { - dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; - } - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); - DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); - - p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; - p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; - p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; - } - } else { - p->time_per_pte_group_nom_luma[k] = 0; - p->time_per_pte_group_vblank_luma[k] = 0; - p->time_per_pte_group_flip_luma[k] = 0; - p->time_per_pte_group_nom_chroma[k] = 0; - p->time_per_pte_group_vblank_chroma[k] = 0; - p->time_per_pte_group_flip_chroma[k] = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); - - DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); -#endif - } -} // CalculateMetaAndPTETimes - -static void CalculateVMGroupAndRequestTimes( - const struct dml2_display_cfg *display_cfg, - unsigned int NumberOfActiveSurfaces, - unsigned int BytePerPixelC[], - double dst_y_per_vm_vblank[], - double dst_y_per_vm_flip[], - unsigned int dpte_row_width_luma_ub[], - unsigned int dpte_row_width_chroma_ub[], - unsigned int vm_group_bytes[], - unsigned int dpde0_bytes_per_frame_ub_l[], - unsigned int dpde0_bytes_per_frame_ub_c[], - unsigned int tdlut_pte_bytes_per_frame[], - unsigned int meta_pte_bytes_per_frame_ub_l[], - unsigned int meta_pte_bytes_per_frame_ub_c[], - bool mrq_present, - - // Output - double TimePerVMGroupVBlank[], - double TimePerVMGroupFlip[], - double TimePerVMRequestVBlank[], - double TimePerVMRequestFlip[]) -{ - unsigned int num_group_per_lower_vm_stage = 0; - unsigned int num_req_per_lower_vm_stage = 0; - unsigned int num_group_per_lower_vm_stage_flip; - unsigned int num_group_per_lower_vm_stage_pref; - unsigned int num_req_per_lower_vm_stage_flip; - unsigned int num_req_per_lower_vm_stage_pref; - double line_time; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); -#endif - for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); - DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); -#endif - - if (display_cfg->gpuvm_enable) { - if (display_cfg->gpuvm_max_page_table_levels >= 2) { - num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); - - if (BytePerPixelC[k] > 0) - num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); - } - - if (dcc_mrq_enable) { - if (BytePerPixelC[k] > 0) { - num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + - math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)); - } else { - num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)); - } - } - - num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; - num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; - - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { - num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); - if (display_cfg->gpuvm_max_page_table_levels >= 2) - num_group_per_lower_vm_stage_pref += 1; // tdpe0 group - } - - if (display_cfg->gpuvm_max_page_table_levels >= 2) { - num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; - if (BytePerPixelC[k] > 0) - num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; - } - - if (dcc_mrq_enable) { - num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; - if (BytePerPixelC[k] > 0) - num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; - } - - num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; - num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; - - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { - num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; - } - - line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; - - if (num_group_per_lower_vm_stage_pref > 0) - TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; - else - TimePerVMGroupVBlank[k] = 0; - - if (num_group_per_lower_vm_stage_flip > 0) - TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; - else - TimePerVMGroupFlip[k] = 0; - - if (num_req_per_lower_vm_stage_pref > 0) - TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; - else - TimePerVMRequestVBlank[k] = 0.0; - if (num_req_per_lower_vm_stage_flip > 0) - TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; - else - TimePerVMRequestFlip[k] = 0.0; - - DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); - DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); - DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); - DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); - DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); - - if (display_cfg->gpuvm_max_page_table_levels > 2) { - TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; - TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; - TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; - TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; - } - - } else { - TimePerVMGroupVBlank[k] = 0; - TimePerVMGroupFlip[k] = 0; - TimePerVMRequestVBlank[k] = 0; - TimePerVMRequestFlip[k] = 0; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); -#endif - } -} - -static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, - struct dml2_core_calcs_CalculateStutterEfficiency_params *p) -{ - struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; - - unsigned int TotalNumberOfActiveOTG = 0; - double SinglePixelClock = 0; - unsigned int SingleHTotal = 0; - unsigned int SingleVTotal = 0; - bool SameTiming = true; - bool FoundCriticalSurface = false; - - memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { - if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { - if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { - l->MaximumEffectiveCompressionLuma = 2; - } else { - l->MaximumEffectiveCompressionLuma = 4; - } - l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); - DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); -#endif - l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; - l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; - - if (p->ReadBandwidthSurfaceChroma[k] > 0) { - if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { - l->MaximumEffectiveCompressionChroma = 2; - } else { - l->MaximumEffectiveCompressionChroma = 4; - } - l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); - DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); -#endif - l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; - l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; - } - } else { - l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; - } - l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); - } - } - - l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; - l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); - DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); - DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); - DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); - DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); - DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); - DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); - - DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); - DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); - DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); - DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); -#endif - if (l->AverageDCCZeroSizeFraction == 1) { - l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; - l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; - - - } else if (l->AverageDCCZeroSizeFraction > 0) { - l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; - l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, - (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + - (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, - ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) - : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); - - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); - DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); - DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); -#endif - } else { - l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, - (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + - ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); -#endif - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); - DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); - DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); - DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); -#endif - - *p->StutterPeriod = 0; - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { - l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; - l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); - l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); - DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); - DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); - DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); - DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); -#endif - - if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { - bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; - - FoundCriticalSurface = true; - *p->StutterPeriod = l->DETBufferingTimeY; - l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; - l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; - l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; - l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; - l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; - l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; - l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); - l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); - DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); - DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); -#endif - } - } - } - - // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer - // stutter period is calculated only on the det sizing - // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress - // else - // the cdb + rob part will be in compressed rate with urg bw (idea bw) - // the det part will be return at uncompressed rate with 64B/dcfclk - // - // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" - // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk - - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); - DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); - DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); - DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); - DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); - DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); - DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); -#endif - - l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer - / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + - (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) - / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + - *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); - DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); - DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); - DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); -#endif - l->TotalActiveWriteback = 0; - memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); - - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { - if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { - - if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) - l->TotalActiveWriteback = l->TotalActiveWriteback + 1; - - if (TotalNumberOfActiveOTG == 0) { // first otg - SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; - SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; - } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || - SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || - SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { - SameTiming = false; - } - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; - l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1; - } - } - } - - if (l->TotalActiveWriteback == 0) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); - DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); - DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); -#endif - *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; - *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; - *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); - *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); - } else { - *p->StutterEfficiencyNotIncludingVBlank = 0.; - *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; - *p->NumberOfStutterBurstsPerFrame = 0; - *p->Z8NumberOfStutterBurstsPerFrame = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); - DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); - DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); - DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); -#endif - - if (*p->StutterEfficiencyNotIncludingVBlank > 0) { - if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { - *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; - } else { - *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; - } - } else { - *p->StutterEfficiency = 0; - *p->NumberOfStutterBurstsPerFrame = 0; - } - - if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { - //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; - if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { - *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; - } else { - *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; - } - } else { - *p->Z8StutterEfficiency = 0.; - *p->Z8NumberOfStutterBurstsPerFrame = 0; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); - DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); - DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); - DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); - DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); - DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); - DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); - DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); - DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); - DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); -#endif - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); - DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); -#endif -} - -static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) -{ - const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; - const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; - const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; - struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; - struct dml2_display_cfg_programming *programming = in_out_params->programming; - - struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; - struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; - struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; - struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; - struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; - struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; - struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; - struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; - struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; - - unsigned int k; - bool must_support_iflip; - const long min_return_uclk_cycles = 83; - const long min_return_fclk_cycles = 75; - const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; - double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; - double max_uclk_mhz = 0; - double min_return_latency_in_DCFCLK_cycles = 0; - - DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); - - memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); - memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); - - s->num_active_planes = display_cfg->num_planes; - get_stream_output_bpp(s->OutputBpp, display_cfg); - - mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); - dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); - - for (k = 0; k < s->num_active_planes; ++k) { - unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); - DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || - cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || - cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); - - if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) - DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); - - switch (cfg_support_info->stream_support_info[stream_index].odms_used) { - case (4): - mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; - break; - case (3): - mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; - break; - case (2): - mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; - break; - default: - if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4) - mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; - else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2) - mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; - else - mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; - break; - } - } - - for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; - DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); - } - - for (k = 0; k < s->num_active_planes; ++k) { - unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; - DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); - } - - mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; - - DML_ASSERT(mode_lib->mp.Dcfclk > 0); - DML_ASSERT(mode_lib->mp.FabricClock > 0); - DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); - DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); - DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); - DML_ASSERT(mode_lib->mp.Dispclk > 0); - DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); - DML_ASSERT(s->SOCCLK > 0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); - DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); - DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); - DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); - DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); - DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); - DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); - for (k = 0; k < s->num_active_planes; ++k) { - DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); - } - DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); - DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); - DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); - DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); - DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); - for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { - DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); - DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); - } - - for (k = 0; k < s->num_active_planes; k++) - DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); -#endif - - CalculateMaxDETAndMinCompressedBufferSize( - mode_lib->ip.config_return_buffer_size_in_kbytes, - mode_lib->ip.config_return_buffer_segment_size_in_kbytes, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->ip.max_num_dpp, - display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, - display_cfg->overrides.hw.force_nom_det_size_kbytes.value, - mode_lib->ip.dcn_mrq_present, - - /* Output */ - &s->MaxTotalDETInKByte, - &s->NomDETInKByte, - &s->MinCompressedBufferSizeInKByte); - - - PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); - - for (k = 0; k < s->num_active_planes; ++k) { - CalculateSinglePipeDPPCLKAndSCLThroughput( - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, - mode_lib->ip.max_pscl_lb_bw_pix_per_clk, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, - - /* Output */ - &mode_lib->mp.PSCL_THROUGHPUT[k], - &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], - &mode_lib->mp.DPPCLKUsingSingleDPP[k]); - } - - for (k = 0; k < s->num_active_planes; ++k) { - CalculateBytePerPixelAndBlockSizes( - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->plane_descriptors[k].surface.tiling, - display_cfg->plane_descriptors[k].surface.plane0.pitch, - display_cfg->plane_descriptors[k].surface.plane1.pitch, - - // Output - &mode_lib->mp.BytePerPixelY[k], - &mode_lib->mp.BytePerPixelC[k], - &mode_lib->mp.BytePerPixelInDETY[k], - &mode_lib->mp.BytePerPixelInDETC[k], - &mode_lib->mp.Read256BlockHeightY[k], - &mode_lib->mp.Read256BlockHeightC[k], - &mode_lib->mp.Read256BlockWidthY[k], - &mode_lib->mp.Read256BlockWidthC[k], - &mode_lib->mp.MacroTileHeightY[k], - &mode_lib->mp.MacroTileHeightC[k], - &mode_lib->mp.MacroTileWidthY[k], - &mode_lib->mp.MacroTileWidthC[k], - &mode_lib->mp.surf_linear128_l[k], - &mode_lib->mp.surf_linear128_c[k]); - } - - CalculateSwathWidth( - display_cfg, - false, // ForceSingleDPP - s->num_active_planes, - mode_lib->mp.ODMMode, - mode_lib->mp.BytePerPixelY, - mode_lib->mp.BytePerPixelC, - mode_lib->mp.Read256BlockHeightY, - mode_lib->mp.Read256BlockHeightC, - mode_lib->mp.Read256BlockWidthY, - mode_lib->mp.Read256BlockWidthC, - mode_lib->mp.surf_linear128_l, - mode_lib->mp.surf_linear128_c, - mode_lib->mp.NoOfDPP, - - /* Output */ - mode_lib->mp.req_per_swath_ub_l, - mode_lib->mp.req_per_swath_ub_c, - mode_lib->mp.SwathWidthSingleDPPY, - mode_lib->mp.SwathWidthSingleDPPC, - mode_lib->mp.SwathWidthY, - mode_lib->mp.SwathWidthC, - s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] - s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] - mode_lib->mp.swath_width_luma_ub, - mode_lib->mp.swath_width_chroma_ub); - - for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); - mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); - } - - CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; - CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; - CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; - CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; - CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; - CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; - CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; - CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; - CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; - CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; - CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; - CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; - CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; - CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; - CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; - CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; - CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - // output - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; - CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; - CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; - CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; - CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; - CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; - CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; - CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; - CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; - CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; - CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; - CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; - CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; - CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; - CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; - CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; - CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; - CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; - CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; - CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; - - // Calculate DET size, swath height here. - CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); - - // DSC Delay - for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, - mode_lib->mp.ODMMode[k], - mode_lib->ip.maximum_dsc_bits_per_component, - s->OutputBpp[k], - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, - cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - s->PixelClockBackEnd[k]); - } - - // Prefetch - if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { - for (k = 0; k < s->num_active_planes; ++k) - mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; - } else { - CalculateSurfaceSizeInMall( - display_cfg, - s->num_active_planes, - mode_lib->soc.mall_allocated_for_dcn_mbytes, - mode_lib->mp.BytePerPixelY, - mode_lib->mp.BytePerPixelC, - mode_lib->mp.Read256BlockWidthY, - mode_lib->mp.Read256BlockWidthC, - mode_lib->mp.Read256BlockHeightY, - mode_lib->mp.Read256BlockHeightC, - mode_lib->mp.MacroTileWidthY, - mode_lib->mp.MacroTileWidthC, - mode_lib->mp.MacroTileHeightY, - mode_lib->mp.MacroTileHeightC, - - /* Output */ - mode_lib->mp.SurfaceSizeInTheMALL, - &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ - } - - for (k = 0; k < s->num_active_planes; ++k) { - s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; - s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; - s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; - s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; - s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; - s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; - s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; - s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; - s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; - s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; - s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; - s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; - s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; - s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; - s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; - s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; - s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; - s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; - s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; - s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; - s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; - s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; - s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; - } - - CalculateVMRowAndSwath_params->display_cfg = display_cfg; - CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; - CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; - CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; - CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; - CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; - CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; - CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; - CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - // output - CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; - CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; - CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; - CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; - CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; - CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; - CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; - CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; - CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; - CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; - CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; - CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; - CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; - CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; - CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; - CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; - CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; - CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; - CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; - CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; - CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; - CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; - CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; - CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; - CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; - CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; - CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; - CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; - CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; - CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; - CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; - CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; - CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; - CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; - CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; - CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; - CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; - CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; - CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; - CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; - CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; - CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; - CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; - CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; - CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; - CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; - CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; - CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; - CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; - CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; - - CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); - - memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); - if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { - for (k = 0; k < s->num_active_planes; k++) { - mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; - mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; - mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; - mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; - mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; - mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; - } - } else { - for (k = 0; k < s->num_active_planes; k++) { - calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; - calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; - calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; - calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; - calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - - calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; - calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); - calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; - calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; - calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; - - calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; - calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; - calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; - calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; - calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; - calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; - calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; - calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; - calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; - calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; - - calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; - calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; - calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; - calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; - calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; - calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; - calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; - calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; - calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; - - // output - calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; - calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; - calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; - calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; - - calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; - calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; - calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k]; - calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; - calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; - - calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; - calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; - calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k]; - calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; - calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; - - calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; - calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; - calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; - calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); - } - - calculate_mall_bw_overhead_factor( - mode_lib->mp.mall_prefetch_sdp_overhead_factor, - mode_lib->mp.mall_prefetch_dram_overhead_factor, - - // input - display_cfg, - s->num_active_planes); - } - - // Calculate all the bandwidth availabe - calculate_bandwidth_available( - mode_lib->mp.avg_bandwidth_available_min, - mode_lib->mp.avg_bandwidth_available, - mode_lib->mp.urg_bandwidth_available_min, - mode_lib->mp.urg_bandwidth_available, - mode_lib->mp.urg_bandwidth_available_vm_only, - mode_lib->mp.urg_bandwidth_available_pixel_and_vm, - - &mode_lib->soc, - display_cfg->hostvm_enable, - mode_lib->mp.Dcfclk, - mode_lib->mp.FabricClock, - mode_lib->mp.dram_bw_mbps); - - - calculate_hostvm_inefficiency_factor( - &s->HostVMInefficiencyFactor, - &s->HostVMInefficiencyFactorPrefetch, - - display_cfg->gpuvm_enable, - display_cfg->hostvm_enable, - mode_lib->ip.remote_iommu_outstanding_translations, - mode_lib->soc.max_outstanding_reqs, - mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], - mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); - - s->TotalDCCActiveDPP = 0; - s->TotalActiveDPP = 0; - for (k = 0; k < s->num_active_planes; ++k) { - s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; - if (display_cfg->plane_descriptors[k].surface.dcc.enable) - s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; - } - // Calculate tdlut schedule related terms - for (k = 0; k <= s->num_active_planes - 1; k++) { - calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; - calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; - calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; - calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; - calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - - // output - calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; - calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; - calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; - calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); - } - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - CalculateExtraLatency( - display_cfg, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, - s->ReorderingBytes, - mode_lib->mp.Dcfclk, - mode_lib->mp.FabricClock, - mode_lib->ip.pixel_chunk_size_kbytes, - mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], - s->num_active_planes, - mode_lib->mp.NoOfDPP, - mode_lib->mp.dpte_group_bytes, - s->tdlut_bytes_per_group, - s->HostVMInefficiencyFactor, - s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, - mode_lib->soc.qos_parameters.qos_type, - !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), - mode_lib->soc.max_outstanding_reqs, - mode_lib->mp.request_size_bytes_luma, - mode_lib->mp.request_size_bytes_chroma, - mode_lib->ip.meta_chunk_size_kbytes, - mode_lib->ip.dchub_arb_to_ret_delay, - mode_lib->mp.TripToMemory, - mode_lib->ip.hostvm_mode, - - // output - &mode_lib->mp.ExtraLatency, - &mode_lib->mp.ExtraLatency_sr, - &mode_lib->mp.ExtraLatencyPrefetch); - - mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; - - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - mode_lib->mp.WritebackDelay[k] = - mode_lib->soc.qos_parameters.writeback.base_latency_us - + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; - } else - mode_lib->mp.WritebackDelay[k] = 0; - } - - /* VActive bytes to fetch for UCLK P-State */ - calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; - calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes; - calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; - calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY; - calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY; - calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC; - calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - - /* outputs */ - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; - calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; - - calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); - - /* Excess VActive bandwidth required to fill DET */ - calculate_excess_vactive_bandwidth_required( - display_cfg, - s->num_active_planes, - s->pstate_bytes_required_l, - s->pstate_bytes_required_c, - /* outputs */ - mode_lib->mp.excess_vactive_fill_bw_l, - mode_lib->mp.excess_vactive_fill_bw_c); - - mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, - mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, - mode_lib->mp.FabricClock, - mode_lib->mp.uclk_freq_mhz, - mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); - - mode_lib->mp.TripToMemory = CalculateTripToMemory( - mode_lib->mp.UrgentLatency, - mode_lib->mp.FabricClock, - mode_lib->mp.uclk_freq_mhz, - mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); - - mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); - - mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( - mode_lib->mp.UrgentLatency, - mode_lib->mp.FabricClock, - mode_lib->mp.uclk_freq_mhz, - mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); - - for (k = 0; k < s->num_active_planes; ++k) { - bool cursor_not_enough_urgent_latency_hiding = false; - s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - - s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; - - s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->mp.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane0.width, - display_cfg->plane_descriptors[k].composition.viewport.plane0.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->mp.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane1.width, - display_cfg->plane_descriptors[k].composition.viewport.plane1.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); - - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - s->line_times[k], - mode_lib->mp.UrgentLatency, - - // output - &mode_lib->mp.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); - } - mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; - - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->mp.swath_width_luma_ub[k], - mode_lib->mp.swath_width_chroma_ub[k], - mode_lib->mp.SwathHeightY[k], - mode_lib->mp.SwathHeightC[k], - s->line_times[k], - mode_lib->mp.UrgentLatency, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->mp.BytePerPixelInDETY[k], - mode_lib->mp.BytePerPixelInDETC[k], - mode_lib->mp.DETBufferSizeY[k], - mode_lib->mp.DETBufferSizeC[k], - - /* output */ - &mode_lib->mp.UrgentBurstFactorLuma[k], - &mode_lib->mp.UrgentBurstFactorChroma[k], - &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); - - mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; - } - - for (k = 0; k < s->num_active_planes; ++k) { - s->MaxVStartupLines[k] = CalculateMaxVStartup( - mode_lib->ip.ptoi_supported, - mode_lib->ip.vblank_nom_default_us, - &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, - mode_lib->mp.WritebackDelay[k]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); -#endif - } - - s->immediate_flip_required = false; - for (k = 0; k < s->num_active_planes; ++k) { - s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); -#endif - - if (s->num_active_planes > 1) { - CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; - CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; - CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; - CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; - CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; - CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; - CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care - CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; - CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; - CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; - - // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible - CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; - CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; - CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming - } - - { - s->DestinationLineTimesForPrefetchLessThan2 = false; - s->VRatioPrefetchMoreThanMax = false; - - DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); - - for (k = 0; k < s->num_active_planes; ++k) { - struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - - DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - mode_lib->mp.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); - - myPipe->Dppclk = mode_lib->mp.Dppclk[k]; - myPipe->Dispclk = mode_lib->mp.Dispclk; - myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; - myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; - myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; - myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; - myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; - myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; - myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; - myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - myPipe->ODMMode = mode_lib->mp.ODMMode[k]; - myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); -#endif - CalculatePrefetchSchedule_params->display_cfg = display_cfg; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; - CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; - CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; - CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; - CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; - CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency; - CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; - CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; - CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); - CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; - CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; - CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; - CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; - CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; - CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; - - // output - CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; - CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; - CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; - CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; - CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; - CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; - CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; - CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; - CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; - CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; - CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; - CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; - CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; - - mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); - - if (s->impacted_dst_y_pre[k] > 0) - mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; - else - mode_lib->mp.impacted_prefetch_margin_us[k] = 0; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); -#endif - mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; - } // for k - - mode_lib->mp.PrefetchModeSupported = true; - for (k = 0; k < s->num_active_planes; ++k) { - if (mode_lib->mp.NoTimeToPrefetch[k] == true || - mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || - mode_lib->mp.DSTYAfterScaler[k] > 8) { - DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); - mode_lib->mp.PrefetchModeSupported = false; - } - if (mode_lib->mp.dst_y_prefetch[k] < 2) - s->DestinationLineTimesForPrefetchLessThan2 = true; - - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { - s->VRatioPrefetchMoreThanMax = true; - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); - } - - if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { - DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); - mode_lib->mp.PrefetchModeSupported = false; - } - } - - if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { - DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); - DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); - mode_lib->mp.PrefetchModeSupported = false; - } - - DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, - mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); - - // Prefetch schedule OK, now check prefetch bw - if (mode_lib->mp.PrefetchModeSupported == true) { - for (k = 0; k < s->num_active_planes; ++k) { - double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->mp.swath_width_luma_ub[k], - mode_lib->mp.swath_width_chroma_ub[k], - mode_lib->mp.SwathHeightY[k], - mode_lib->mp.SwathHeightC[k], - line_time_us, - mode_lib->mp.UrgentLatency, - mode_lib->mp.VRatioPrefetchY[k], - mode_lib->mp.VRatioPrefetchC[k], - mode_lib->mp.BytePerPixelInDETY[k], - mode_lib->mp.BytePerPixelInDETC[k], - mode_lib->mp.DETBufferSizeY[k], - mode_lib->mp.DETBufferSizeC[k], - /* Output */ - &mode_lib->mp.UrgentBurstFactorLumaPre[k], - &mode_lib->mp.UrgentBurstFactorChromaPre[k], - &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); - - DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - - DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); - DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); -#endif - } - - for (k = 0; k <= s->num_active_planes - 1; k++) - mode_lib->mp.final_flip_bw[k] = 0; - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 0; - calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - // Check urg peak bandwidth against available urg bw - // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) - check_urgent_bandwidth_support( - &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth - &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall - &s->dummy_boolean[1], // vactive bw ok - &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok - - mode_lib->soc.mall_allocated_for_dcn_mbytes, - mode_lib->mp.non_urg_bandwidth_required, - mode_lib->mp.urg_vactive_bandwidth_required, - mode_lib->mp.urg_bandwidth_required, - mode_lib->mp.urg_bandwidth_available); - - if (!mode_lib->mp.PrefetchModeSupported) - DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); - - for (k = 0; k < s->num_active_planes; ++k) { - if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { - DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); - mode_lib->mp.PrefetchModeSupported = false; - } - } - } // prefetch schedule ok - - // Prefetch schedule and prefetch bw ok, now check flip bw - if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw - - mode_lib->mp.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->mp.urg_bandwidth_required_qual, // no flip - mode_lib->mp.urg_bandwidth_available); - mode_lib->mp.TotImmediateFlipBytes = 0; - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor, - mode_lib->mp.vm_bytes[k], - mode_lib->mp.PixelPTEBytesPerRow[k], - mode_lib->mp.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); - DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); - DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); - DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); - DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); - DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); -#endif - } - for (k = 0; k < s->num_active_planes; ++k) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 0, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->mp.vm_bytes[k], - mode_lib->mp.PixelPTEBytesPerRow[k], - mode_lib->mp.BandwidthAvailableForImmediateFlip, - mode_lib->mp.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->mp.Tno_bw[k], - mode_lib->mp.dpte_row_height[k], - mode_lib->mp.dpte_row_height_chroma[k], - mode_lib->mp.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->mp.meta_row_bytes[k], - mode_lib->mp.meta_row_height[k], - mode_lib->mp.meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - // Output - &mode_lib->mp.dst_y_per_vm_flip[k], - &mode_lib->mp.dst_y_per_row_flip[k], - &mode_lib->mp.final_flip_bw[k], - &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); - } - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; - calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip - &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok - - dml2_core_internal_soc_state_sys_active, - mode_lib->mp.urg_bandwidth_required_flip, - mode_lib->mp.non_urg_bandwidth_required_flip, - mode_lib->mp.urg_bandwidth_available); - - if (!mode_lib->mp.ImmediateFlipSupported) - DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); - - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { - mode_lib->mp.ImmediateFlipSupported = false; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); -#endif - } - } - } else { // flip or prefetch not support - mode_lib->mp.ImmediateFlipSupported = false; - } - - // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) - must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; - mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); - for (k = 0; k < s->num_active_planes; ++k) - DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); - DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); - DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); -#endif - DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); - } - - for (k = 0; k < s->num_active_planes; ++k) - DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - - if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { - DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); - } else { - DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); - - // DCC Configuration - for (k = 0; k < s->num_active_planes; ++k) { -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); -#endif - CalculateDCCConfiguration( - display_cfg->plane_descriptors[k].surface.dcc.enable, - display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, - display_cfg->plane_descriptors[k].pixel_format, - display_cfg->plane_descriptors[k].surface.plane0.width, - display_cfg->plane_descriptors[k].surface.plane1.width, - display_cfg->plane_descriptors[k].surface.plane0.height, - display_cfg->plane_descriptors[k].surface.plane1.height, - s->NomDETInKByte, - mode_lib->mp.Read256BlockHeightY[k], - mode_lib->mp.Read256BlockHeightC[k], - display_cfg->plane_descriptors[k].surface.tiling, - mode_lib->mp.BytePerPixelY[k], - mode_lib->mp.BytePerPixelC[k], - mode_lib->mp.BytePerPixelInDETY[k], - mode_lib->mp.BytePerPixelInDETC[k], - display_cfg->plane_descriptors[k].composition.rotation_angle, - - /* Output */ - &mode_lib->mp.RequestLuma[k], - &mode_lib->mp.RequestChroma[k], - &mode_lib->mp.DCCYMaxUncompressedBlock[k], - &mode_lib->mp.DCCCMaxUncompressedBlock[k], - &mode_lib->mp.DCCYMaxCompressedBlock[k], - &mode_lib->mp.DCCCMaxCompressedBlock[k], - &mode_lib->mp.DCCYIndependentBlock[k], - &mode_lib->mp.DCCCIndependentBlock[k]); - } - - //Watermarks and NB P-State/DRAM Clock Change Support - s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; - s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; - s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; - s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; - s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; - s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; - s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; - s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; - s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; - s->mmSOCParameters.USRRetrainingLatency = 0; - s->mmSOCParameters.SMNLatency = 0; - s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); - s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); - s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; - s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; - - CalculateWatermarks_params->display_cfg = display_cfg; - CalculateWatermarks_params->USRRetrainingRequired = false; - CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; - CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; - CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = s->SOCCLK; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; - CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; - CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; - CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; - CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; - CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; - CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP; - - // Output - CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; - CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; - CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; - CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; - CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; - CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; - CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support; - CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; - CalculateWatermarks_params->VActiveLatencyHidingUs = 0; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { - mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); - mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); - } else { - mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; - mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; - } - } - - calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); - - DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); - DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); - - //Display Pipeline Delivery Time in Prefetch, Groups - CalculatePixelDeliveryTimes( - display_cfg, - cfg_support_info, - s->num_active_planes, - mode_lib->mp.VRatioPrefetchY, - mode_lib->mp.VRatioPrefetchC, - mode_lib->mp.swath_width_luma_ub, - mode_lib->mp.swath_width_chroma_ub, - mode_lib->mp.PSCL_THROUGHPUT, - mode_lib->mp.PSCL_THROUGHPUT_CHROMA, - mode_lib->mp.Dppclk, - mode_lib->mp.BytePerPixelC, - mode_lib->mp.req_per_swath_ub_l, - mode_lib->mp.req_per_swath_ub_c, - - /* Output */ - mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, - mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, - mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, - mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, - mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, - mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, - mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, - mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); - - CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; - CalculateMetaAndPTETimes_params->display_cfg = display_cfg; - CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; - CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; - CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; - CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; - CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; - CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; - CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; - CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; - CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; - CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; - CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; - CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; - CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; - CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; - CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; - CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; - CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; - CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; - CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; - - CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; - CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; - CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; - CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; - CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; - CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; - CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; - CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; - CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; - CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; - - CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; - CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; - CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; - CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; - CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; - CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; - CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; - CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; - CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; - CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; - CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; - CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; - CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; - CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; - CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; - CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; - CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; - - CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); - - CalculateVMGroupAndRequestTimes( - display_cfg, - s->num_active_planes, - mode_lib->mp.BytePerPixelC, - mode_lib->mp.dst_y_per_vm_vblank, - mode_lib->mp.dst_y_per_vm_flip, - mode_lib->mp.dpte_row_width_luma_ub, - mode_lib->mp.dpte_row_width_chroma_ub, - mode_lib->mp.vm_group_bytes, - mode_lib->mp.dpde0_bytes_per_frame_ub_l, - mode_lib->mp.dpde0_bytes_per_frame_ub_c, - s->tdlut_pte_bytes_per_frame, - mode_lib->mp.meta_pte_bytes_per_frame_ub_l, - mode_lib->mp.meta_pte_bytes_per_frame_ub_c, - mode_lib->ip.dcn_mrq_present, - - /* Output */ - mode_lib->mp.TimePerVMGroupVBlank, - mode_lib->mp.TimePerVMGroupFlip, - mode_lib->mp.TimePerVMRequestVBlank, - mode_lib->mp.TimePerVMRequestFlip); - - // VStartup Adjustment - for (k = 0; k < s->num_active_planes; ++k) { - bool isInterlaceTiming; - - mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; - if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) - mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); -#endif - s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); - DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); -#endif - - mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; - if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { - mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; - } - - isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); - - // The actual positioning of the vstartup - mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); - - s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); - s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; - s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; - - if (s->blank_lines_remaining < 0) { - DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); - s->blank_lines_remaining = 0; - DML_ASSERT(0); - } - mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; - - // debug only - if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= - (isInterlaceTiming ? - math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : - (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { - mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; - } else { - mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); - DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); - DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); - DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); - DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); -#endif - } - - //Maximum Bandwidth Used - mode_lib->mp.TotalWRBandwidth = 0; - for (k = 0; k < display_cfg->num_streams; ++k) { - s->WRBandwidth = 0; - if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { - s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height - * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / - (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height - / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) - * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); - mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth; - } - } - - mode_lib->mp.TotalDataReadBandwidth = 0; - for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); - DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); -#endif - } - - CalculateStutterEfficiency_params->display_cfg = display_cfg; - CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; - CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; - CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; - CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; - CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; - CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; - CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; - CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; - CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; - CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; - CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; - CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; - CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; - CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; - CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; - CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; - CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; - CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; - CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; - CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; - CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; - CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; - CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; - CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; - CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; - CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; - CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; - CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; - CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; - CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; - CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; - CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; - CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; - CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; - - // output - CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; - CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; - CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; - CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; - CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; - CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; - CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; - CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - - // Stutter Efficiency - CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); - -#ifdef __DML_VBA_ALLOW_DELTA__ - // Calculate z8 stutter eff assuming 0 reserved space - CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; - CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; - - CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; - CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; - CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; - CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; - - // Stutter Efficiency - CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); -#else - mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; - mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; - mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; - mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; -#endif - } // PrefetchAndImmediateFlipSupported - - max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; - min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; - mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; - mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); - DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); - DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); - DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); - DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); - DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); - DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); - DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); - DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); -#endif - return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); -} - -bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) -{ - DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); - bool result = dml_core_mode_programming(in_out_params); - - DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); - DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); - return result; -} - -void dml2_core_calcs_get_dpte_row_height( - unsigned int *dpte_row_height, - struct dml2_core_internal_display_mode_lib *mode_lib, - bool is_plane1, - enum dml2_source_format_class SourcePixelFormat, - enum dml2_swizzle_mode SurfaceTiling, - enum dml2_rotation_angle ScanDirection, - unsigned int pitch, - unsigned int GPUVMMinPageSizeKBytes) -{ - unsigned int BytePerPixelY; - unsigned int BytePerPixelC; - double BytePerPixelInDETY; - double BytePerPixelInDETC; - unsigned int BlockHeight256BytesY; - unsigned int BlockHeight256BytesC; - unsigned int BlockWidth256BytesY; - unsigned int BlockWidth256BytesC; - unsigned int MacroTileWidthY; - unsigned int MacroTileWidthC; - unsigned int MacroTileHeightY; - unsigned int MacroTileHeightC; - bool surf_linear_128_l = false; - bool surf_linear_128_c = false; - - CalculateBytePerPixelAndBlockSizes( - SourcePixelFormat, - SurfaceTiling, - pitch, - pitch, - - /* Output */ - &BytePerPixelY, - &BytePerPixelC, - &BytePerPixelInDETY, - &BytePerPixelInDETC, - &BlockHeight256BytesY, - &BlockHeight256BytesC, - &BlockWidth256BytesY, - &BlockWidth256BytesC, - &MacroTileHeightY, - &MacroTileHeightC, - &MacroTileWidthY, - &MacroTileWidthC, - &surf_linear_128_l, - &surf_linear_128_c); - - unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; - unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; - unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; - unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; - unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; - unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); - DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); - DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); - DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); - DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); - DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); - DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); - DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); - DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); - DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); -#endif - unsigned int dummy_integer[21]; - - mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1; - mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes; - mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes; - mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat; - mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling; - mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel; - mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection; - mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1; - mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4; - mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests; - mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch; - mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth; - mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight; - mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0; - mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0; - - mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14]; - - mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19]; - mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20]; - - // just supply with enough parameters to calculate dpte - CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); -#endif -} - -static bool is_dual_plane(enum dml2_source_format_class source_format) -{ - bool ret_val = false; - - if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) - ret_val = true; - - return ret_val; -} - -static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) -{ - unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; - return plane_idx; -} - -static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) -{ - double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - - wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); - wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); - wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); - wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); - wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); - wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz); - wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz); - wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); - wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); - wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); -} - -static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) -{ - if (a == 0) - return 0; - - return (math_log2_approx(a) - subtrahend); -} - -void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) -{ - int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - - (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); - cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); - DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); - DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); - DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); - DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); -#endif - - cursor_dlg_regs->chunk_hdl_adjust = 3; - cursor_dlg_regs->dst_y_offset = 0; - - cursor_dlg_regs->qos_level_fixed = 8; - cursor_dlg_regs->qos_ramp_disable = 0; -} - -static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, - const struct dml2_display_cfg *display_cfg, - const struct dml2_core_internal_display_mode_lib *mode_lib, - unsigned int pipe_idx) -{ - unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; - enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; - bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format)); - - unsigned int pixel_chunk_bytes = 0; - unsigned int min_pixel_chunk_bytes = 0; - unsigned int dpte_group_bytes = 0; - unsigned int mpte_group_bytes = 0; - - unsigned int p1_pixel_chunk_bytes = 0; - unsigned int p1_min_pixel_chunk_bytes = 0; - unsigned int p1_dpte_group_bytes = 0; - unsigned int p1_mpte_group_bytes = 0; - - unsigned int detile_buf_plane1_addr = 0; - unsigned int detile_buf_size_in_bytes; - double stored_swath_l_bytes; - double stored_swath_c_bytes; - bool is_phantom_pipe; - - DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); - - pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); - min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); - - if (pixel_chunk_bytes == 64 * 1024) - min_pixel_chunk_bytes = 0; - - dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); - mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); - - p1_pixel_chunk_bytes = pixel_chunk_bytes; - p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; - p1_dpte_group_bytes = dpte_group_bytes; - p1_mpte_group_bytes = mpte_group_bytes; - - if (source_format == dml2_rgbe_alpha) - p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); - - rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib); - rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); - rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); - - if (min_pixel_chunk_bytes == 0) - rq_regs->rq_regs_l.min_chunk_size = 0; - else - rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); - - if (p1_min_pixel_chunk_bytes == 0) - rq_regs->rq_regs_c.min_chunk_size = 0; - else - rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); - - rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); - rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); - rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); - rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); - - detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); - - if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { - unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); -#endif - DML_ASSERT(p0_pte_row_height_linear >= 8); - - rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; - if (dual_plane) { - unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); -#endif - if (sw_mode == dml2_sw_linear) { - DML_ASSERT(p1_pte_row_height_linear >= 8); - } - rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; - } - } else { - rq_regs->rq_regs_l.pte_row_height_linear = 0; - rq_regs->rq_regs_c.pte_row_height_linear = 0; - } - - rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0); - rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0); - - // FIXME_DCN4, programming guide has dGPU condition - if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb - rq_regs->drq_expansion_mode = 0; - } else { - rq_regs->drq_expansion_mode = 2; - } - rq_regs->prq_expansion_mode = 1; - rq_regs->crq_expansion_mode = 1; - rq_regs->mrq_expansion_mode = 1; - - stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); - stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); - is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); - - // Note: detile_buf_plane1_addr is in unit of 1KB - if (dual_plane) { - if (is_phantom_pipe) { - detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma - } else { - if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { - detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); -#endif - } else { - detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); -#endif - } - } - } - rq_regs->plane1_base_address = detile_buf_plane1_addr; - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); - DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); - DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); - DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); - DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); - DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); -#endif - //DML_LOG_VERBOSE_rq_regs_st(rq_regs); - DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); -} - -static void rq_dlg_get_dlg_reg( - struct dml2_core_internal_scratch *s, - struct dml2_display_dlg_regs *disp_dlg_regs, - struct dml2_display_ttu_regs *disp_ttu_regs, - const struct dml2_display_cfg *display_cfg, - const struct dml2_core_internal_display_mode_lib *mode_lib, - const unsigned int pipe_idx) -{ - struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; - - memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); - - DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); - - l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); - - l->source_format = dml2_444_8; - l->odm_mode = dml2_odm_mode_bypass; - l->dual_plane = false; - l->htotal = 0; - l->hactive = 0; - l->hblank_end = 0; - l->vblank_end = 0; - l->interlaced = false; - l->pclk_freq_in_mhz = 0.0; - l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - l->ref_freq_to_pix_freq = 0.0; - - if (l->plane_idx < DML2_MAX_PLANES) { - - l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; - l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; - l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; - - l->dual_plane = is_dual_plane(l->source_format); - - l->htotal = l->timing->h_total; - l->hactive = l->timing->h_active; - l->hblank_end = l->timing->h_blank_end; - l->vblank_end = l->timing->v_blank_end; - l->interlaced = l->timing->interlaced; - l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; - l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; - - DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); - DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); - DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); - DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); - DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); - DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); - DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); - DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); - - DML_ASSERT(l->refclk_freq_in_mhz != 0); - DML_ASSERT(l->pclk_freq_in_mhz != 0); - DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); - - // Need to figure out which side of odm combine we're in - // Assume the pipe instance under the same plane is in order - - if (l->odm_mode == dml2_odm_mode_bypass) { - disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); - } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { - // find out how many pipe are in this plane - l->num_active_pipes = mode_lib->mp.num_active_pipes; - l->first_pipe_idx_in_plane = DML2_MAX_PLANES; - l->pipe_idx_in_combine = 0; // pipe index within the plane - l->odm_combine_factor = 2; - - if (l->odm_mode == dml2_odm_mode_combine_3to1) - l->odm_combine_factor = 3; - else if (l->odm_mode == dml2_odm_mode_combine_4to1) - l->odm_combine_factor = 4; - - for (unsigned int i = 0; i < l->num_active_pipes; i++) { - if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { - if (i < l->first_pipe_idx_in_plane) { - l->first_pipe_idx_in_plane = i; - } - } - } - l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) - - disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); - DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); - DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); - DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); - DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); - } - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); - - DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); - - disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); - disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); - disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits - - l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; - l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); - - DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); - DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); - DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); - - l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); - disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; - - DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); - - l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); - l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); - - DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); - - l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; - - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); - - if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { - DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); - } - - l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; - l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; - - DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); - DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); - - // Active - l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); - - l->refcyc_per_line_delivery_pre_c = 0.0; - l->refcyc_per_line_delivery_c = 0.0; - - if (l->dual_plane) { - l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); - } - - disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); - disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); - - l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); - - l->refcyc_per_req_delivery_pre_c = 0.0; - l->refcyc_per_req_delivery_c = 0.0; - if (l->dual_plane) { - l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); - DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); - } - - // TTU - Cursor - DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); - - // Assign to register structures - disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); - DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); - - disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line - disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk - disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); - - disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); - disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); - - DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); - DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); - DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); - DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); - - disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); - disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); - disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); - disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); - - l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; - l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); - - disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); - disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); - disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); - disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); - disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); - disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); - disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); - disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); - disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); - disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); - - l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; - l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; - l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - - disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); - disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); - disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); - disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); - disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); - disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); - disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); - disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); - - disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); - disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off - - disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); - disp_ttu_regs->qos_level_low_wm = 0; - - disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); - - disp_ttu_regs->qos_level_flip = 14; - disp_ttu_regs->qos_level_fixed_l = 8; - disp_ttu_regs->qos_level_fixed_c = 8; - disp_ttu_regs->qos_ramp_disable_l = 0; - disp_ttu_regs->qos_ramp_disable_c = 0; - disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); - - // CHECK for HW registers' range, DML_ASSERT or clamp - DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); - DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); - DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); - DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); - if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); - - - DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); - DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); - - if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { - DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); - l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; - } - if (l->dual_plane) { - if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { - DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); - l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; - } - } - - if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); - if (l->dual_plane) { - if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); - } - DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); - if (l->dual_plane) { - DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); - } - - DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); - DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); - DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); - DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); - DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); - DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); - DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); - - DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); - - } -} - -static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) -{ - double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - - arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; - arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max - arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; - arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; - arb_param->sat_level_us = 60; - arb_param->hvm_max_qos_commit_threshold = 0xf; - arb_param->hvm_min_req_outstand_commit_threshold = 0xa; - arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024; - arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); - arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); - arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); - DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); - DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); - DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); - DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); -#endif - -} - -void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) -{ - rq_dlg_get_wm_regs(display_cfg, mode_lib, out); -} - -void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) -{ - rq_dlg_get_arb_params(display_cfg, mode_lib, out); -} - -void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, - struct dml2_core_internal_display_mode_lib *mode_lib, - struct dml2_dchub_per_pipe_register_set *out, int pipe_index) -{ - rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); - rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); - out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; -} - -void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) -{ - out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); - out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); - out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); - out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); - out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); -} - -void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) -{ - dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); -} - -void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, - const struct display_configuation_with_meta *display_cfg, - struct dmub_cmd_fams2_global_config *fams2_global_config) -{ - fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; - - if (fams2_global_config->features.bits.enable) { - fams2_global_config->features.bits.enable_stall_recovery = true; - fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; - - fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; - fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; - fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; - fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; - - fams2_global_config->num_streams = display_cfg->display_config.num_streams; - } -} - -void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, - const struct display_configuation_with_meta *display_cfg, - union dmub_cmd_fams2_config *fams2_base_programming, - union dmub_cmd_fams2_config *fams2_sub_programming, - enum dml2_pstate_method pstate_method, - int plane_index) -{ - const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; - const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; - const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index]; - - struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; - union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; - - unsigned int i; - - if (display_cfg->display_config.overrides.all_streams_blanked) { - /* stream is blanked, so do nothing */ - return; - } - - /* from display configuration */ - base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; - base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; - base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal - - stream_descriptor->timing.v_front_porch); - base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal - - stream_descriptor->timing.v_front_porch - - stream_descriptor->timing.v_active); - base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; - - /* from meta */ - base_programming->otg_vline_time_ns = - (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0); - base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines; - base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines; - base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines; - base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal - - stream_descriptor->timing.v_front_porch - - stream_pstate_meta->method_drr.programming_delay_otg_vlines); - base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines; - base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal; - - /* from core */ - base_programming->config.bits.min_ttu_vblank_usable = true; - for (i = 0; i < display_cfg->display_config.num_planes; i++) { - /* check if all planes support p-state in blank */ - if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && - mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { - base_programming->config.bits.min_ttu_vblank_usable = false; - break; - } - } - - switch (pstate_method) { - case dml2_pstate_method_vactive: - case dml2_pstate_method_fw_vactive_drr: - /* legacy vactive */ - base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; - sub_programming->legacy.vactive_det_fill_delay_otg_vlines = - (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; - base_programming->allow_start_otg_vline = - (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline; - base_programming->allow_end_otg_vline = - (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline; - base_programming->config.bits.clamp_vtotal_min = true; - break; - case dml2_pstate_method_vblank: - case dml2_pstate_method_fw_vblank_drr: - /* legacy vblank */ - base_programming->type = FAMS2_STREAM_TYPE_VBLANK; - base_programming->allow_start_otg_vline = - (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline; - base_programming->allow_end_otg_vline = - (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline; - base_programming->config.bits.clamp_vtotal_min = true; - break; - case dml2_pstate_method_fw_drr: - /* drr */ - base_programming->type = FAMS2_STREAM_TYPE_DRR; - sub_programming->drr.programming_delay_otg_vlines = - (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines; - sub_programming->drr.nom_stretched_vtotal = - (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal; - base_programming->allow_start_otg_vline = - (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline; - base_programming->allow_end_otg_vline = - (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline; - /* drr only clamps to vtotal min for single display */ - base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; - sub_programming->drr.only_stretch_if_required = true; - break; - case dml2_pstate_method_fw_svp: - case dml2_pstate_method_fw_svp_drr: - /* subvp */ - base_programming->type = FAMS2_STREAM_TYPE_SUBVP; - sub_programming->subvp.vratio_numerator = - (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); - sub_programming->subvp.vratio_denominator = 1000; - sub_programming->subvp.programming_delay_otg_vlines = - (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines; - sub_programming->subvp.prefetch_to_mall_otg_vlines = - (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; - sub_programming->subvp.phantom_vtotal = - (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal; - sub_programming->subvp.phantom_vactive = - (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive; - sub_programming->subvp.config.bits.is_multi_planar = - plane_descriptor->surface.plane1.height > 0; - sub_programming->subvp.config.bits.is_yuv420 = - plane_descriptor->pixel_format == dml2_420_8 || - plane_descriptor->pixel_format == dml2_420_10 || - plane_descriptor->pixel_format == dml2_420_12; - - base_programming->allow_start_otg_vline = - (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline; - base_programming->allow_end_otg_vline = - (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline; - base_programming->config.bits.clamp_vtotal_min = true; - break; - case dml2_pstate_method_reserved_hw: - case dml2_pstate_method_reserved_fw: - case dml2_pstate_method_reserved_fw_drr_clamped: - case dml2_pstate_method_reserved_fw_drr_var: - case dml2_pstate_method_na: - case dml2_pstate_method_count: - default: - /* this should never happen */ - break; - } -} - -void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) -{ - unsigned int n; - - out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx); - out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx); - out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx); - out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx); - - for (n = 0; n < out->num_mcaches_plane0; n++) - out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n); - - for (n = 0; n < out->num_mcaches_plane1; n++) - out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n); - - out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx); - out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx); - out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx); - out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx); - out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx); - - out->valid = true; -} - -void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) -{ - *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index); -} - -void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) -{ - out->mall_svp_size_requirement_ways = 0; - - out->nominal_vblank_pstate_latency_hiding_us = - (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); - - out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; - - out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; - - out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]); -} - -void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) -{ - double phantom_processing_delay_pix; - unsigned int phantom_processing_delay_lines; - unsigned int phantom_min_v_active_lines; - unsigned int phantom_v_active_lines; - unsigned int phantom_v_startup_lines; - unsigned int phantom_v_blank_lines; - unsigned int main_v_blank_lines; - unsigned int rem; - - phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * - ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); - phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_div_rem(phantom_processing_delay_pix, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, - &rem); - if (rem) - phantom_processing_delay_lines++; - - phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); - phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / - display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); - phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; - - // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) - phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; - main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; - if (phantom_v_blank_lines > main_v_blank_lines) - phantom_v_blank_lines = main_v_blank_lines; - - out->phantom_v_active = phantom_v_active_lines; - // phantom_vtotal = vactive + vblank - out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; - - out->phantom_min_v_active = phantom_min_v_active_lines; - out->phantom_v_startup = phantom_v_startup_lines; - - out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); - DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); - DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); - DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); -#endif -} - -void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) -{ - unsigned int k, n; - - out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; - out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; - out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; - out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; - out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; - out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; - out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; - out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; - out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; - out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; - out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; - out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; - out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; - out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; - out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; - out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; - out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; - out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; - out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; - out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; - out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; - out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; - out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; - out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; - out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; - out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; - out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support; - out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; - - out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; - out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; - out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; - out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; - out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; - out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; - out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; - - out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; - out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; - out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; - out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; - out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; - - out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; - out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; - out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; - - out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; - out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; - out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; - out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; - out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; - out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport; - out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; - out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support; - out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support; - - for (k = 0; k < out->display_config.num_planes; k++) { - - out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; - out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; - out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; - out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; - out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; - out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; - out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; - out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; - - if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; - else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; - else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; - else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; - else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; - else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) - out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; - - if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4; - else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4) - out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4; - - out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; - out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; - } - - out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib); - out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib); - out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib); - out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib); - - out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib); - out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib); - out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib); - out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib); - out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); - out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); - out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); - out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); - - out->informative.mall.total_surface_size_in_mall_bytes = 0; - out->informative.dpp.total_num_dpps_required = 0; - for (k = 0; k < out->display_config.num_planes; ++k) { - out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; - out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k]; - } - - out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; - out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib); - - out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib); - out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib); - out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib); - - out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib); - out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib); - out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib); - out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib); - out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib); - - out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib); - out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib); - out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib); - out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib); - - out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib); - out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib); - out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib); - out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib); - - out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib); - out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib); - out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib); - - out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib); - out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib); - out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib); - - out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib); - out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib); - out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib); - out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib); - - out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib); - out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib); - out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib); - out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib); - - out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib); - out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib); - out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib); - out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib); - - out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib); - out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib); - out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib); - out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib); - - out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib); - out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib); - - out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib); - out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib); - out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); - - out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib); - out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); - out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib); - - out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib); - out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); - out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib); - out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib); - - out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib); - out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib); - out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib); - - out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); - - out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); - - out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); - - out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; - - for (k = 0; k < out->display_config.num_planes; k++) { - - if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) - && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) - && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) - out->informative.misc.PrefetchMode[k] = 0; - else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) - && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) - out->informative.misc.PrefetchMode[k] = 1; - else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) - out->informative.misc.PrefetchMode[k] = 2; - else - out->informative.misc.PrefetchMode[k] = 3; - - out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; - out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; - out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; - out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; - out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; - out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; - out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; - out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; - out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; - out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; - out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; - out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; - out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; - out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; - out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; - out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; - out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; - out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; - out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; - out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; - out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; - out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; - out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; - out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; - out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; - out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; - out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; - out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; - out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; - out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; - out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; - out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; - out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; - out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; - out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; - out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; - out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; - out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; - out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; - out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; - out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; - out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; - out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; - out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; - - out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; - out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; - out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; - out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; - out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; - out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; - out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; - out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; - out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; - out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; - out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; - out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; - out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; - out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; - out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; - out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; - out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; - out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; - out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; - - out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0; - out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; - out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; - out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; - out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; - out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; - out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; - out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; - - if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) - out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; - } - - // For this DV informative layer, all pipes in the same planes will just use the same id - // will have the optimization and helper layer later on - // only work when we can have high "mcache" that fit everything without thrashing the cache - for (k = 0; k < out->display_config.num_planes; k++) { - out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k); - out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k); - - for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { - out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n); - out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; - } - - out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k); - out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k); - - for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { - out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n); - out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; - } - } - out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { - if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { - out->informative.misc.ROBUrgencyAvoidance = true; - } else { - out->informative.misc.ROBUrgencyAvoidance = false; - } - } else { - out->informative.misc.ROBUrgencyAvoidance = true; - } -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h deleted file mode 100644 index 27ef0e096b25..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_DCN4_CALCS_H__ -#define __DML2_CORE_DCN4_CALCS_H__ - -#include "dml2_core_shared_types.h" - -struct dml2_dchub_watermark_regs; -struct dml2_display_arb_regs; -struct dml2_per_stream_programming; -struct dml2_dchub_per_pipe_register_set; -struct core_plane_support_info; -struct core_stream_support_info; -struct dml2_cursor_dlg_regs; -struct display_configuation_with_meta; - -unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params); -bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params); -void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); -void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); -void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *dml2_display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); -void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); -void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index); -void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_index); -void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_index); -void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); -void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); -void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); -void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index); -void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); - -void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); -void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); -const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); -const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c deleted file mode 100644 index cc4f0663c6d6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_core_factory.h" -#include "dml2_core_dcn4.h" -#include "dml2_external_lib_deps.h" - -bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out) -{ - bool result = false; - - if (!out) - return false; - - memset(out, 0, sizeof(struct dml2_core_instance)); - - out->project_id = project_id; - - switch (project_id) { - case dml2_project_dcn4x_stage1: - result = false; - break; - case dml2_project_dcn4x_stage2: - case dml2_project_dcn4x_stage2_auto_drr_svp: - out->initialize = &core_dcn4_initialize; - out->mode_support = &core_dcn4_mode_support; - out->mode_programming = &core_dcn4_mode_programming; - out->populate_informative = &core_dcn4_populate_informative; - out->calculate_mcache_allocation = &core_dcn4_calculate_mcache_allocation; - result = true; - break; - case dml2_project_invalid: - default: - break; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h deleted file mode 100644 index 411c514fe65c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_FACTORY_H__ -#define __DML2_CORE_FACTORY_H__ - -#include "dml2_internal_shared_types.h" -#include "dml_top_types.h" - -bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h deleted file mode 100644 index 051c31ec2f0e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ /dev/null @@ -1,2335 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_SHARED_TYPES_H__ -#define __DML2_CORE_SHARED_TYPES_H__ - -#include "dml2_external_lib_deps.h" -#include "dml_top_display_cfg_types.h" -#include "dml_top_types.h" - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); - return dividend / divisor; - -} - -const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) -{ - switch (bw_type) { - case (dml2_core_internal_bw_sdp): - return("dml2_core_internal_bw_sdp"); - case (dml2_core_internal_bw_dram): - return("dml2_core_internal_bw_dram"); - case (dml2_core_internal_bw_max): - return("dml2_core_internal_bw_max"); - default: - return("dml2_core_internal_bw_unknown"); - } -} - -bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) -{ - bool val = false; - - switch (source_format) { - case dml2_444_8: - val = 0; - break; - case dml2_444_16: - val = 0; - break; - case dml2_444_32: - val = 0; - break; - case dml2_444_64: - val = 0; - break; - case dml2_420_8: - val = 1; - break; - case dml2_420_10: - val = 1; - break; - case dml2_420_12: - val = 1; - break; - case dml2_rgbe_alpha: - val = 0; - break; - case dml2_rgbe: - val = 0; - break; - case dml2_mono_8: - val = 0; - break; - case dml2_mono_16: - val = 0; - break; - case dml2_422_planar_8: - val = 0; - break; - case dml2_422_planar_10: - val = 0; - break; - case dml2_422_planar_12: - val = 0; - break; - case dml2_422_packed_8: - val = 0; - break; - case dml2_422_packed_10: - val = 0; - break; - case dml2_422_packed_12: - val = 0; - break; - default: - DML_ASSERT(0); - break; - } - return val; -} - -bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) -{ - bool val = false; - - switch (source_format) { - case dml2_444_8: - val = 0; - break; - case dml2_444_16: - val = 0; - break; - case dml2_444_32: - val = 0; - break; - case dml2_444_64: - val = 0; - break; - case dml2_420_8: - val = 0; - break; - case dml2_420_10: - val = 0; - break; - case dml2_420_12: - val = 0; - break; - case dml2_rgbe_alpha: - val = 0; - break; - case dml2_rgbe: - val = 0; - break; - case dml2_mono_8: - val = 0; - break; - case dml2_mono_16: - val = 0; - break; - case dml2_422_planar_8: - val = 1; - break; - case dml2_422_planar_10: - val = 1; - break; - case dml2_422_planar_12: - val = 1; - break; - case dml2_422_packed_8: - val = 0; - break; - case dml2_422_packed_10: - val = 0; - break; - case dml2_422_packed_12: - val = 0; - break; - default: - DML_ASSERT(0); - break; - } - return val; -} - -bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) -{ - bool val = false; - - switch (source_format) { - case dml2_444_8: - val = 0; - break; - case dml2_444_16: - val = 0; - break; - case dml2_444_32: - val = 0; - break; - case dml2_444_64: - val = 0; - break; - case dml2_420_8: - val = 0; - break; - case dml2_420_10: - val = 0; - break; - case dml2_420_12: - val = 0; - break; - case dml2_rgbe_alpha: - val = 0; - break; - case dml2_rgbe: - val = 0; - break; - case dml2_mono_8: - val = 0; - break; - case dml2_mono_16: - val = 0; - break; - case dml2_422_planar_8: - val = 0; - break; - case dml2_422_planar_10: - val = 0; - break; - case dml2_422_planar_12: - val = 0; - break; - case dml2_422_packed_8: - val = 1; - break; - case dml2_422_packed_10: - val = 1; - break; - case dml2_422_packed_12: - val = 1; - break; - default: - DML_ASSERT(0); - break; - } - return val; -} - -void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) -{ - DML_LOG_VERBOSE("DML: ===================================== \n"); - DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); - if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); - if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); - if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); - if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); - if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); - if (!fail_only || support->ExceededMultistreamSlots == 1) - DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); - if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); - if (!fail_only || support->NotEnoughLanesForMSO == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->P2IWith420 == 1) - DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSC422NativeNotSupported == 1) - DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); - if (!fail_only || support->NotEnoughDSCUnits == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); - if (!fail_only || support->NotEnoughDSCSlices == 1) - DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); - if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ROBSupport == 0) - DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); - if (!fail_only || support->OutstandingRequestsSupport == 0) - DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); - if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); - if (!fail_only || support->TotalAvailablePipesSupport == 0) - DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); - if (!fail_only || support->NumberOfOTGSupport == 0) - DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->EnoughWritebackUnits == 0) - DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); - if (!fail_only || support->CursorSupport == 0) - DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->PrefetchSupported == 0) - DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); - if (!fail_only || support->AvgBandwidthSupport == 0) - DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->DynamicMetadataSupported == 0) - DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); - if (!fail_only || support->VRatioInPrefetchSupported == 0) - DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); - if (!fail_only || support->ExceededMALLSize == 1) - DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->g6_temp_read_support == 0) - DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); - if (!fail_only || support->ImmediateFlipSupport == 0) - DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->LinkCapacitySupport == 0) - DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); - - if (!fail_only || support->ModeSupport == 0) - DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); - DML_LOG_VERBOSE("DML: ===================================== \n"); -} - -const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) -{ - switch (dml2_core_internal_soc_state_type) { - case (dml2_core_internal_soc_state_sys_idle): - return("dml2_core_internal_soc_state_sys_idle"); - case (dml2_core_internal_soc_state_sys_active): - return("dml2_core_internal_soc_state_sys_active"); - case (dml2_core_internal_soc_state_svp_prefetch): - return("dml2_core_internal_soc_state_svp_prefetch"); - case dml2_core_internal_soc_state_max: - default: - return("dml2_core_internal_soc_state_unknown"); - } -} - - -void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) -{ - for (unsigned int k = 0; k < display_cfg->num_planes; k++) { - double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { - switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { - case dml2_444: - out_bpp[k] = bpc * 3; - break; - case dml2_s422: - out_bpp[k] = bpc * 2; - break; - case dml2_n422: - out_bpp[k] = bpc * 2; - break; - case dml2_420: - default: - out_bpp[k] = bpc * 1.5; - break; - } - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { - out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; - } else { - out_bpp[k] = 0; - } -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); -#endif - } -} - -unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) -{ - unsigned int remainder; - - if (multiple == 0) - return num; - - remainder = num % multiple; - if (remainder == 0) - return num; - - if (up) - return (num + multiple - remainder); - else - return (num - remainder); -} - -unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) -{ - unsigned int num_active_pipes = 0; - - for (unsigned int k = 0; k < num_planes; k++) { - num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; - } - -#ifdef __DML_VBA_DEBUG__ - DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); -#endif - return num_active_pipes; -} - -void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) -{ - unsigned int pipe_idx = 0; - - for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { - pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; - } - - for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { - for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { - pipe_plane[pipe_idx] = plane_idx; - pipe_idx++; - } - } -} - -bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) -{ - bool is_phantom = false; - - if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || - plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { - is_phantom = true; - } - - return is_phantom; -} - -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) -{ - - if (sw_mode == dml2_sw_linear) - return 256; - else if (sw_mode == dml2_sw_256b_2d) - return 256; - else if (sw_mode == dml2_sw_4kb_2d) - return 4096; - else if (sw_mode == dml2_sw_64kb_2d) - return 65536; - else if (sw_mode == dml2_sw_256kb_2d) - return 262144; - else if (sw_mode == dml2_gfx11_sw_linear) - return 256; - else if (sw_mode == dml2_gfx11_sw_64kb_d) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_d_t) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_d_x) - return 65536; - else if (sw_mode == dml2_gfx11_sw_64kb_r_x) - return 65536; - else if (sw_mode == dml2_gfx11_sw_256kb_d_x) - return 262144; - else if (sw_mode == dml2_gfx11_sw_256kb_r_x) - return 262144; - else { - DML_ASSERT(0); - return 256; - }; -} - -bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) -{ - return (byte_per_pixel != 2); -} - -bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) -{ - return sw_mode == dml2_sw_linear; -}; - - -bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) -{ - bool is_vert = false; - if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { - is_vert = true; - } else { - is_vert = false; - } - return is_vert; -} - -int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) -{ - int unsigned version = 0; - - if (sw_mode == dml2_sw_linear || - sw_mode == dml2_sw_256b_2d || - sw_mode == dml2_sw_4kb_2d || - sw_mode == dml2_sw_64kb_2d || - sw_mode == dml2_sw_256kb_2d) - version = 12; - else if (sw_mode == dml2_gfx11_sw_linear || - sw_mode == dml2_gfx11_sw_64kb_d || - sw_mode == dml2_gfx11_sw_64kb_d_t || - sw_mode == dml2_gfx11_sw_64kb_d_x || - sw_mode == dml2_gfx11_sw_64kb_r_x || - sw_mode == dml2_gfx11_sw_256kb_d_x || - sw_mode == dml2_gfx11_sw_256kb_r_x) - version = 11; - else { - DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML_ASSERT(0); - } - - return version; -} - -unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) -{ - unsigned int i; - unsigned int index = 0; - - for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); - - if (i == 0) - index = 0; - else - index = i - 1; - - if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || - per_uclk_dpm_params[i].minimum_uclk_khz == 0) { - break; - } - } -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); -#endif - return index; -} - -unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) -{ - unsigned int i; - bool clk_entry_found = false; - - for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); - - if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { - clk_entry_found = true; - break; - } - } - - if (!clk_entry_found) - DML_ASSERT(clk_entry_found); -#if defined(__DML_VBA_DEBUG__) - DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); -#endif - return i; -} - -bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) -{ - bool ret_val = false; - - if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) - ret_val = true; - - return ret_val; -} - -unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) -{ - if (a == 0) - return 0; - - return (math_log2_approx(a) - subtrahend); -} - -static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, - const struct dml2_implicit_svp_meta *meta) -{ - memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); - - phantom->timing.v_total = meta->v_total; - phantom->timing.v_active = meta->v_active; - phantom->timing.v_front_porch = meta->v_front_porch; - phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; - phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; - phantom->timing.drr_config.enabled = false; -} - -static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, - const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) -{ - memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); - - phantom->stream_index = phantom_stream_index; - phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; - phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( - (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), - (double)main->composition.viewport.plane0.height); - phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( - (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), - (double)main->composition.viewport.plane1.height); - phantom->immediate_flip = false; - phantom->dynamic_meta_data.enable = false; - phantom->cursor.num_cursors = 0; - phantom->cursor.cursor_width = 0; - phantom->tdlut.setup_for_tdlut = false; -} - -void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, - struct dml2_core_scratch *scratch) -{ - unsigned int stream_index, plane_index; - const struct dml2_plane_parameters *main_plane; - const struct dml2_stream_parameters *main_stream; - const struct dml2_stream_parameters *phantom_stream; - - memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); - memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); - memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); - memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); - - if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) - return; - - /* disable unbounded requesting for all planes until stage 3 has been performed */ - if (!display_cfg->stage3.performed) { - svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; - svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; - } - // Create the phantom streams - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; - scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; - scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; - - if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { - // Create the phantom stream - create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], - main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); - - // Associate this phantom stream to the main stream - scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; - scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; - - // Increment num streams - svp_expanded_display_cfg->num_streams++; - } - } - - // Create the phantom planes - for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { - main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; - - if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { - main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; - phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; - create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], - main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); - - // Associate this phantom plane to the main plane - scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; - scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; - - // Increment num planes - svp_expanded_display_cfg->num_planes++; - - // Adjust the main plane settings - svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; - } - } -} - -bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) -{ - switch (stream_descriptor->output.output_encoder) { - case dml2_dp: - case dml2_dp2p0: - case dml2_edp: - case dml2_hdmi: - case dml2_hdmifrl: - return true; - case dml2_none: - default: - return false; - } -} -bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) -{ - switch (stream_descriptor->output.output_encoder) { - case dml2_dp: - case dml2_dp2p0: - case dml2_edp: - case dml2_hdmifrl: - return true; - case dml2_hdmi: - case dml2_none: - default: - return false; - } -} - - -bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) -{ - switch (stream_descriptor->output.output_encoder) { - case dml2_dp: - case dml2_edp: - return true; - case dml2_dp2p0: - case dml2_hdmi: - case dml2_hdmifrl: - case dml2_none: - default: - return false; - } -} - -bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) -{ - switch (stream_descriptor->output.output_encoder) { - case dml2_dp2p0: - return true; - case dml2_dp: - case dml2_edp: - case dml2_hdmi: - case dml2_hdmifrl: - case dml2_none: - default: - return false; - } -} - -bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) -{ - return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) - || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); -} - - -bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) -{ - switch (rate) { - case dml2_dp_rate_hbr: - case dml2_dp_rate_hbr2: - case dml2_dp_rate_hbr3: - return true; - case dml2_dp_rate_na: - case dml2_dp_rate_uhbr10: - case dml2_dp_rate_uhbr13p5: - case dml2_dp_rate_uhbr20: - default: - return false; - } -} - -bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) -{ - switch (rate) { - case dml2_dp_rate_uhbr10: - case dml2_dp_rate_uhbr13p5: - case dml2_dp_rate_uhbr20: - return true; - case dml2_dp_rate_hbr: - case dml2_dp_rate_hbr2: - case dml2_dp_rate_hbr3: - case dml2_dp_rate_na: - default: - return false; - } -} - -bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) -{ - switch (odm_mode) { - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - return true; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_combine_2to1: - case dml2_odm_mode_combine_3to1: - case dml2_odm_mode_combine_4to1: - default: - return false; - } -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h deleted file mode 100644 index 95f0d017add4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_UTILS_H__ -#define __DML2_CORE_UTILS_H__ -#include "dml2_internal_shared_types.h" -#include "dml2_debug.h" -#include "lib_float_math.h" - -double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); -const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); -bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); -bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format); -bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format); -void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); -const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); -void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); -unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); -unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); -void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); -bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); -bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); -bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); -bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); -int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); -unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); -unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); -bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); -unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); -void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, - struct dml2_core_scratch *scratch); -bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); -bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); -bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); -bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); -bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); -bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); -bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); -bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); - -#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c deleted file mode 100644 index 22969a533a7b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ /dev/null @@ -1,785 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_dpmm_dcn4.h" -#include "dml2_internal_shared_types.h" -#include "dml_top_types.h" -#include "lib_float_math.h" - -static double dram_bw_kbps_to_uclk_khz(unsigned long long bandwidth_kbps, const struct dml2_dram_params *dram_config) -{ - double uclk_khz = 0; - unsigned long uclk_mbytes_per_tick = 0; - - uclk_mbytes_per_tick = dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; - - uclk_khz = (double)bandwidth_kbps / uclk_mbytes_per_tick; - - return uclk_khz; -} - -static void get_minimum_clocks_for_latency(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, - double *uclk, - double *fclk, - double *dcfclk) -{ - int min_clock_index_for_latency; - - if (in_out->display_cfg->stage3.success) - min_clock_index_for_latency = in_out->display_cfg->stage3.min_clk_index_for_latency; - else - min_clock_index_for_latency = in_out->display_cfg->stage1.min_clk_index_for_latency; - - *dcfclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_dcfclk_khz; - *fclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_fclk_khz; - *uclk = dram_bw_kbps_to_uclk_khz(in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].pre_derate_dram_bw_kbps, - &in_out->soc_bb->clk_table.dram_config); -} - -static unsigned long dml_round_up(double a) -{ - if (a - (unsigned long)a > 0) { - return ((unsigned long)a) + 1; - } - return (unsigned long)a; -} - -static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - double min_uclk_avg, min_uclk_urgent, min_uclk_bw; - double min_fclk_avg, min_fclk_urgent, min_fclk_bw; - double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; - double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); - - min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - if (in_out->display_cfg->display_config.hostvm_enable) - min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100); - else - min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); - - min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; - - min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); - - min_fclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); - - min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; - - min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); - - min_dcfclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); - - min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; - - get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - - in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); -} - -static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - double min_uclk_avg, min_uclk_urgent, min_uclk_bw; - double min_fclk_avg, min_fclk_urgent, min_fclk_bw; - double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; - double min_fclk_latency, min_dcfclk_latency; - double min_uclk_latency; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - /* assumes DF throttling is enabled */ - min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); - - min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100); - - min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; - - min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100); - - min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100); - - min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; - - min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100); - - min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100); - - min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; - - get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - - in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); - - /* assumes DF throttling is disabled */ - min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); - - min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); - - min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; - - min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); - - min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); - - min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; - - min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); - - min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); - - min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; - - get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - - in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); -} - -static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - double min_uclk_avg; - double min_fclk_avg; - double min_dcfclk_avg; - double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); - min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dram_derate_percent_pixel / 100); - - min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; - min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.fclk_derate_percent / 100); - - min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; - min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dcfclk_derate_percent / 100); - - get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - - in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); - in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); - in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); -} - -static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) -{ - enum dentist_divider_range { - DFS_DIVIDER_RANGE_1_START = 8, /* 2.00 */ - DFS_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ - DFS_DIVIDER_RANGE_2_START = 64, /* 16.00 */ - DFS_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ - DFS_DIVIDER_RANGE_3_START = 128, /* 32.00 */ - DFS_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ - DFS_DIVIDER_RANGE_4_START = 248, /* 62.00 */ - DFS_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ - DFS_DIVIDER_RANGE_SCALE_FACTOR = 4 - }; - - enum DFS_base_divider_id { - DFS_BASE_DID_1 = 0x08, - DFS_BASE_DID_2 = 0x40, - DFS_BASE_DID_3 = 0x60, - DFS_BASE_DID_4 = 0x7e, - DFS_MAX_DID = 0x7f - }; - - unsigned int divider; - - if (clock_khz < 1 || vco_freq_khz < 1 || clock_khz > vco_freq_khz) - return false; - - clock_khz *= 1.0 + margin; - - divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); - - /* we want to floor here to get higher clock than required rather than lower */ - if (divider < DFS_DIVIDER_RANGE_2_START) { - if (divider < DFS_DIVIDER_RANGE_1_START) - *divider_id = DFS_BASE_DID_1; - else - *divider_id = DFS_BASE_DID_1 + ((divider - DFS_DIVIDER_RANGE_1_START) / DFS_DIVIDER_RANGE_1_STEP); - } else if (divider < DFS_DIVIDER_RANGE_3_START) { - *divider_id = DFS_BASE_DID_2 + ((divider - DFS_DIVIDER_RANGE_2_START) / DFS_DIVIDER_RANGE_2_STEP); - } else if (divider < DFS_DIVIDER_RANGE_4_START) { - *divider_id = DFS_BASE_DID_3 + ((divider - DFS_DIVIDER_RANGE_3_START) / DFS_DIVIDER_RANGE_3_STEP); - } else { - *divider_id = DFS_BASE_DID_4 + ((divider - DFS_DIVIDER_RANGE_4_START) / DFS_DIVIDER_RANGE_4_STEP); - if (*divider_id > DFS_MAX_DID) - *divider_id = DFS_MAX_DID; - } - - *rounded_khz = vco_freq_khz * DFS_DIVIDER_RANGE_SCALE_FACTOR / divider; - - return true; -} - -static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, - unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) -{ - unsigned long pll_frequency_khz; - - pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); - - *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); - - *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); - - if (dtbrefclk_khz > 0) { - *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); - } else { - *rounded_dtbrefclk_khz = 0; - } - - return true; -} - -static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) -{ - bool result = false; - int index = 0; - - if (clock_table->num_clk_values > 2) { - while (index < clock_table->num_clk_values && clock_table->clk_values_khz[index] < min_value) - index++; - - if (index < clock_table->num_clk_values) { - *rounded_value = clock_table->clk_values_khz[index]; - result = true; - } - } else if (clock_table->clk_values_khz[clock_table->num_clk_values - 1] >= min_value) { - *rounded_value = min_value; - result = true; - } - return result; -} - -static bool round_up_to_next_dpm(unsigned long *clock_value, const struct dml2_clk_table *clock_table) -{ - return round_up_and_copy_to_next_dpm(*clock_value, clock_value, clock_table); -} - -static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) -{ - bool result; - - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); - - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); - - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); - - /* these clocks are optional, so they can fail to map, in which case map all to 0 */ - if (result) { - if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) || - !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) || - !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) { - display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0; - display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0; - } - } - - return result; -} - -static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) -{ - bool result; - int index; - - result = false; - for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; - result = true; - break; - } - } - - if (result) { - result = false; - for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; - result = true; - break; - } - } - } - - // SVP is not supported on any coarse grained SoCs - display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; - display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; - - return result; -} - -static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mode_support_result, struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) -{ - bool result = false; - bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_identical = false; - unsigned int i; - - if (!state_table || !display_cfg) - return false; - - if (state_table->dcfclk.num_clk_values == 2) { - dcfclk_fine_grained = true; - } - - if (state_table->fclk.num_clk_values == 2) { - fclk_fine_grained = true; - } - - if (state_table->fclk.num_clk_values == state_table->dcfclk.num_clk_values && - state_table->fclk.num_clk_values == state_table->uclk.num_clk_values) { - clock_state_count_identical = true; - } - - if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_identical) - result = map_soc_min_clocks_to_dpm_fine_grained(display_cfg, state_table); - else - result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); - - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (result) - result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); - } - - for (i = 0; i < display_cfg->display_config.num_streams; i++) { - if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); - if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); - if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); - } - - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); - - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); - - return result; -} - -static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) -{ - unsigned int i; - bool identical = true; - bool contains_drr = false; - unsigned int remap_array[DML2_MAX_PLANES]; - unsigned int remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - // Check that all displays timings are the same - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->stream_descriptors[remap_array[i - 1]].timing, &display_config->stream_descriptors[remap_array[i]].timing, sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - // Check if any displays are drr - for (i = 0; i < remap_array_size; i++) { - if (display_config->stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - // Trivial sync is possible if all displays are identical and none are DRR - return !contains_drr && identical; -} - -static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) -{ - unsigned int i; - int min_idle_us = 0; - unsigned int remap_array[DML2_MAX_PLANES]; - unsigned int remap_array_size = 0; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < in_out->programming->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - if (remap_array_size == 0) - return 0; - - min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[0]].vblank_reserved_time_us; - - for (i = 1; i < remap_array_size; i++) { - if (min_idle_us > mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us) - min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us; - } - - return min_idle_us; -} - -static bool determine_power_management_features_with_vblank_only(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - int min_idle_us; - - if (are_timings_trivially_synchronizable(&in_out->programming->display_config, 0xF)) { - min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xF); - - if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->uclk_pstate_supported = true; - - if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) - in_out->programming->fclk_pstate_supported = true; - } - - return true; -} - -static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) -{ - unsigned int i; - int displays_without_vactive_margin_mask = 0x0; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - for (i = 0; i < in_out->programming->display_config.num_planes; i++) { - if (mode_support_result->cfg_support_info.plane_support_info[i].active_latency_hiding_us - < latency_hiding_requirement_us) - displays_without_vactive_margin_mask |= (0x1 << i); - } - - return displays_without_vactive_margin_mask; -} - -static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) -{ - unsigned int i; - int displays_with_fams_mask = 0x0; - - for (i = 0; i < in_out->programming->display_config.num_planes; i++) { - if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config != dml2_svp_mode_override_auto) - displays_with_fams_mask |= (0x1 << i); - } - - return displays_with_fams_mask; -} - -static bool determine_power_management_features_with_vactive_and_vblank(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - int displays_without_vactive_margin_mask = 0x0; - int min_idle_us = 0; - - if (in_out->programming->uclk_pstate_supported == false) { - displays_without_vactive_margin_mask = - get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); - - if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { - min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); - - if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->uclk_pstate_supported = true; - } - } - - if (in_out->programming->fclk_pstate_supported == false) { - displays_without_vactive_margin_mask = - get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); - - if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { - min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); - - if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) - in_out->programming->fclk_pstate_supported = true; - } - } - - return true; -} - -static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - int displays_without_vactive_margin_mask = 0x0; - int displays_without_fams_mask = 0x0; - - displays_without_vactive_margin_mask = - get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); - - displays_without_fams_mask = - get_displays_with_fams_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); - - if ((displays_without_vactive_margin_mask & ~displays_without_fams_mask) == 0) - in_out->programming->uclk_pstate_supported = true; - - return true; -} - -static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; -} - -static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; -} - -static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - int i; - bool result; - double dispclk_khz; - const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; - - calculate_system_active_minimums(in_out); - calculate_svp_prefetch_minimums(in_out); - calculate_idle_minimums(in_out); - - // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore - // active minimums must be boosted to prefetch minimums - if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) - in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; - - if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) - in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; - - if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) - in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; - - // need some massaging for the dispclk ramping cases: - dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); - // ramping margin should not make dispclk exceed the maximum dispclk speed: - dispclk_khz = math_min2(dispclk_khz, in_out->min_clk_table->max_clocks_khz.dispclk); - // but still the required dispclk can be more than the maximum dispclk speed: - dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - - // DPP Ref is always set to max of all DPP clocks - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) - in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; - } - in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - - // DTB Ref is always set to max of all DTB clocks - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) - in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; - } - in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - - if (in_out->soc_bb->no_dfs) { - round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, - &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); - } else { - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); - } - - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); - } - - in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; - in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; - - result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); - - // By default, all power management features are not enabled - in_out->programming->fclk_pstate_supported = false; - in_out->programming->uclk_pstate_supported = false; - - return result; -} - -bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - bool result; - - result = map_mode_to_soc_dpm(in_out); - - // Check if any can be enabled by nominal vblank idle time - determine_power_management_features_with_vblank_only(in_out); - - // Check if any can be enabled in vactive/vblank - determine_power_management_features_with_vactive_and_vblank(in_out); - - // Check if any can be enabled via fams - determine_power_management_features_with_fams(in_out); - - if (in_out->programming->uclk_pstate_supported == false) - clamp_uclk_to_max(in_out); - - if (in_out->programming->fclk_pstate_supported == false) - clamp_fclk_to_max(in_out); - - return result; -} - -bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - bool result; - int displays_without_vactive_margin_mask = 0x0; - int min_idle_us = 0; - - result = map_mode_to_soc_dpm(in_out); - - if (in_out->display_cfg->stage3.success) - in_out->programming->uclk_pstate_supported = true; - - displays_without_vactive_margin_mask = - get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); - - if (displays_without_vactive_margin_mask == 0) { - in_out->programming->fclk_pstate_supported = true; - } else { - if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { - min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); - - if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) - in_out->programming->fclk_pstate_supported = true; - } - } - - if (in_out->programming->uclk_pstate_supported == false) - clamp_uclk_to_max(in_out); - - if (in_out->programming->fclk_pstate_supported == false) - clamp_fclk_to_max(in_out); - - min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xFF); - if (in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - min_idle_us >= in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) - in_out->programming->stutter.supported_in_blank = true; - else - in_out->programming->stutter.supported_in_blank = false; - - // TODO: Fix me Sam - if (in_out->soc_bb->power_management_parameters.z8_min_idle_time > 0 && - in_out->programming->informative.power_management.z8.stutter_period >= in_out->soc_bb->power_management_parameters.z8_min_idle_time) - in_out->programming->z8_stutter.meets_eco = true; - else - in_out->programming->z8_stutter.meets_eco = false; - - if (in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && - min_idle_us >= in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) - in_out->programming->z8_stutter.supported_in_blank = true; - else - in_out->programming->z8_stutter.supported_in_blank = false; - - return result; -} - -bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) -{ - const struct dml2_display_cfg *display_cfg = &in_out->display_cfg->display_config; - const struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->core->clean_me_up.mode_lib; - struct dml2_dchub_global_register_set *dchubbub_regs = &in_out->programming->global_regs; - - double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; - - /* set A */ - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); - - /* set B */ - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); - - dchubbub_regs->num_watermark_sets = 2; - - return true; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h deleted file mode 100644 index e7b58f2efda4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_DPMM_DCN4_H__ -#define __DML2_DPMM_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); -bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); -bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c deleted file mode 100644 index dfd01440737d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ /dev/null @@ -1,50 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_dpmm_factory.h" -#include "dml2_dpmm_dcn4.h" -#include "dml2_external_lib_deps.h" - -static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) -{ - return true; -} - -static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) -{ - return true; -} - -bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out) -{ - bool result = false; - - if (!out) - return false; - - memset(out, 0, sizeof(struct dml2_dpmm_instance)); - - switch (project_id) { - case dml2_project_dcn4x_stage1: - out->map_mode_to_soc_dpm = &dummy_map_mode_to_soc_dpm; - out->map_watermarks = &dummy_map_watermarks; - result = true; - break; - case dml2_project_dcn4x_stage2: - out->map_mode_to_soc_dpm = &dpmm_dcn3_map_mode_to_soc_dpm; - out->map_watermarks = &dummy_map_watermarks; - result = true; - break; - case dml2_project_dcn4x_stage2_auto_drr_svp: - out->map_mode_to_soc_dpm = &dpmm_dcn4_map_mode_to_soc_dpm; - out->map_watermarks = &dpmm_dcn4_map_watermarks; - result = true; - break; - case dml2_project_invalid: - default: - break; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h deleted file mode 100644 index 20ba2e446f1d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_DPMM_FACTORY_H__ -#define __DML2_DPMM_FACTORY_H__ - -#include "dml2_internal_shared_types.h" -#include "dml_top_types.h" - -bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c deleted file mode 100644 index a265f254152c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_mcg_dcn4.h" -#include "dml_top_soc_parameter_types.h" - -static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table); - -bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) -{ - return build_min_clock_table(in_out->soc_bb, in_out->min_clk_table); -} - -static unsigned long long uclk_to_dram_bw_kbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) -{ - unsigned long long bw_kbps = 0; - - bw_kbps = (unsigned long long) uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; - - return bw_kbps; -} - -static unsigned long round_up_to_quantized_values(unsigned long value, const unsigned long *quantized_values, int num_quantized_values) -{ - int i; - - if (!quantized_values) - return 0; - - for (i = 0; i < num_quantized_values; i++) { - if (quantized_values[i] > value) - return quantized_values[i]; - } - - return 0; -} - -static bool build_min_clk_table_fine_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) -{ - bool dcfclk_fine_grained = false, fclk_fine_grained = false; - - int i; - unsigned int j; - - unsigned long min_dcfclk_khz = 0; - unsigned long min_fclk_khz = 0; - unsigned long prev_100, cur_50; - - if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { - dcfclk_fine_grained = true; - } - - if (soc_bb->clk_table.fclk.num_clk_values == 2) { - fclk_fine_grained = true; - } - - min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[0]; - min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[0]; - - // First calculate the table for "balanced" bandwidths across UCLK/FCLK - for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { - min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); - - min_table->dram_bw_table.entries[i].min_fclk_khz = (unsigned long)((((double)min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps * soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100) / ((double)soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100)) / soc_bb->fabric_datapath_to_dcn_data_return_bytes); - } - min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; - - // To create the minium table, effectively shift "up" all the dcfclk/fclk entries by 1, and then replace the lowest entry with min fclk/dcfclk - for (i = min_table->dram_bw_table.num_entries - 1; i > 0; i--) { - prev_100 = min_table->dram_bw_table.entries[i - 1].min_fclk_khz; - cur_50 = min_table->dram_bw_table.entries[i].min_fclk_khz / 2; - min_table->dram_bw_table.entries[i].min_fclk_khz = prev_100 > cur_50 ? prev_100 : cur_50; - - if (!fclk_fine_grained) { - min_table->dram_bw_table.entries[i].min_fclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_fclk_khz, soc_bb->clk_table.fclk.clk_values_khz, soc_bb->clk_table.fclk.num_clk_values); - } - } - min_table->dram_bw_table.entries[0].min_fclk_khz /= 2; - - // Clamp to minimums and maximums - for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { - if (min_table->dram_bw_table.entries[i].min_dcfclk_khz < min_dcfclk_khz) - min_table->dram_bw_table.entries[i].min_dcfclk_khz = min_dcfclk_khz; - - if (min_table->dram_bw_table.entries[i].min_fclk_khz < min_fclk_khz) - min_table->dram_bw_table.entries[i].min_fclk_khz = min_fclk_khz; - - if (soc_bb->max_fclk_for_uclk_dpm_khz > 0 && - min_table->dram_bw_table.entries[i].min_fclk_khz > soc_bb->max_fclk_for_uclk_dpm_khz) - min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->max_fclk_for_uclk_dpm_khz; - - min_table->dram_bw_table.entries[i].min_dcfclk_khz = - min_table->dram_bw_table.entries[i].min_fclk_khz * - soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent; - - min_table->dram_bw_table.entries[i].min_dcfclk_khz = - min_table->dram_bw_table.entries[i].min_dcfclk_khz * soc_bb->fabric_datapath_to_dcn_data_return_bytes / soc_bb->return_bus_width_bytes; - - if (!dcfclk_fine_grained) { - min_table->dram_bw_table.entries[i].min_dcfclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_dcfclk_khz, soc_bb->clk_table.dcfclk.clk_values_khz, soc_bb->clk_table.dcfclk.num_clk_values); - } - } - - // Prune states which are invalid (some clocks exceed maximum) - for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { - if (min_table->dram_bw_table.entries[i].min_dcfclk_khz > min_table->max_clocks_khz.dcfclk || - min_table->dram_bw_table.entries[i].min_fclk_khz > min_table->max_clocks_khz.fclk) { - min_table->dram_bw_table.num_entries = i; - break; - } - } - - // Prune duplicate states - for (i = 0; i < (int)min_table->dram_bw_table.num_entries - 1; i++) { - if (min_table->dram_bw_table.entries[i].min_dcfclk_khz == min_table->dram_bw_table.entries[i + 1].min_dcfclk_khz && - min_table->dram_bw_table.entries[i].min_fclk_khz == min_table->dram_bw_table.entries[i + 1].min_fclk_khz && - min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps == min_table->dram_bw_table.entries[i + 1].pre_derate_dram_bw_kbps) { - - // i + 1 is the same state as i, so shift everything - for (j = i + 1; j < min_table->dram_bw_table.num_entries; j++) { - min_table->dram_bw_table.entries[j].min_dcfclk_khz = min_table->dram_bw_table.entries[j + 1].min_dcfclk_khz; - min_table->dram_bw_table.entries[j].min_fclk_khz = min_table->dram_bw_table.entries[j + 1].min_fclk_khz; - min_table->dram_bw_table.entries[j].pre_derate_dram_bw_kbps = min_table->dram_bw_table.entries[j + 1].pre_derate_dram_bw_kbps; - } - min_table->dram_bw_table.num_entries--; - } - } - - return true; -} - -static bool build_min_clk_table_coarse_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) -{ - int i; - - for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { - min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); - min_table->dram_bw_table.entries[i].min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[i]; - min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[i]; - } - min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; - - return true; -} - -static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) -{ - bool result; - bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_equal = false; - - if (!soc_bb || !min_table) - return false; - - if (soc_bb->clk_table.dcfclk.num_clk_values < 2 || soc_bb->clk_table.fclk.num_clk_values < 2) - return false; - - if (soc_bb->clk_table.uclk.num_clk_values > DML_MCG_MAX_CLK_TABLE_SIZE) - return false; - - if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { - dcfclk_fine_grained = true; - } - - if (soc_bb->clk_table.fclk.num_clk_values == 2) { - fclk_fine_grained = true; - } - - if (soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.dcfclk.num_clk_values && - soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.uclk.num_clk_values) - clock_state_count_equal = true; - - min_table->fixed_clocks_khz.amclk = 0; - min_table->fixed_clocks_khz.dprefclk = soc_bb->dprefclk_mhz * 1000; - min_table->fixed_clocks_khz.pcierefclk = soc_bb->pcie_refclk_mhz * 1000; - min_table->fixed_clocks_khz.dchubrefclk = soc_bb->dchub_refclk_mhz * 1000; - min_table->fixed_clocks_khz.xtalclk = soc_bb->xtalclk_mhz * 1000; - - min_table->max_clocks_khz.dispclk = soc_bb->clk_table.dispclk.clk_values_khz[soc_bb->clk_table.dispclk.num_clk_values - 1]; - min_table->max_clocks_khz.dppclk = soc_bb->clk_table.dppclk.clk_values_khz[soc_bb->clk_table.dppclk.num_clk_values - 1]; - min_table->max_clocks_khz.dscclk = soc_bb->clk_table.dscclk.clk_values_khz[soc_bb->clk_table.dscclk.num_clk_values - 1]; - min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; - min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; - - min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); - min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); - min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); - - min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; - min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; - - if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_equal) - result = build_min_clk_table_fine_grained(soc_bb, min_table); - else - result = build_min_clk_table_coarse_grained(soc_bb, min_table); - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h deleted file mode 100644 index 02da6f45cbf7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_MCG_DCN4_H__ -#define __DML2_MCG_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); -bool mcg_dcn4_unit_test(void); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c deleted file mode 100644 index c60b8fe90819..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_mcg_factory.h" -#include "dml2_mcg_dcn4.h" -#include "dml2_external_lib_deps.h" - -static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) -{ - return true; -} - -bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out) -{ - bool result = false; - - if (!out) - return false; - - memset(out, 0, sizeof(struct dml2_mcg_instance)); - - switch (project_id) { - case dml2_project_dcn4x_stage1: - out->build_min_clock_table = &dummy_build_min_clock_table; - result = true; - break; - case dml2_project_dcn4x_stage2: - case dml2_project_dcn4x_stage2_auto_drr_svp: - out->build_min_clock_table = &mcg_dcn4_build_min_clock_table; - result = true; - break; - case dml2_project_invalid: - default: - break; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h deleted file mode 100644 index ad307deca3b0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_MCG_FACTORY_H__ -#define __DML2_MCG_FACTORY_H__ - -#include "dml2_internal_shared_types.h" -#include "dml_top_types.h" - -bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c deleted file mode 100644 index 1b9579a32ff2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn3.h" - -static void sort(double *list_a, int list_a_size) -{ - // For all elements b[i] in list_b[] - for (int i = 0; i < list_a_size - 1; i++) { - // Find the first element of list_a that's larger than b[i] - for (int j = i; j < list_a_size - 1; j++) { - if (list_a[j] > list_a[j + 1]) - swap(list_a[j], list_a[j + 1]); - } - } -} - -static double get_max_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index) -{ - struct dml2_plane_parameters *plane_descriptor; - long max_reserved_time_ns = 0; - - for (unsigned int i = 0; i < config->display_config.num_planes; i++) { - plane_descriptor = &config->display_config.plane_descriptors[i]; - - if (plane_descriptor->stream_index == stream_index) - if (plane_descriptor->overrides.reserved_vblank_time_ns > max_reserved_time_ns) - max_reserved_time_ns = plane_descriptor->overrides.reserved_vblank_time_ns; - } - - return (max_reserved_time_ns / 1000.0); -} - - -static void set_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index, double reserved_time_us) -{ - struct dml2_plane_parameters *plane_descriptor; - - for (unsigned int i = 0; i < config->display_config.num_planes; i++) { - plane_descriptor = &config->display_config.plane_descriptors[i]; - - if (plane_descriptor->stream_index == stream_index) - plane_descriptor->overrides.reserved_vblank_time_ns = (long int)(reserved_time_us * 1000); - } -} - -static void remove_duplicates(double *list_a, int *list_a_size) -{ - int j = 0; - - if (*list_a_size == 0) - return; - - for (int i = 1; i < *list_a_size; i++) { - if (list_a[j] != list_a[i]) { - j++; - list_a[j] = list_a[i]; - } - } - - *list_a_size = j + 1; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -static bool iterate_to_next_candidiate(struct dml2_pmo_instance *pmo, int size) -{ - int borrow_from, i; - bool success = false; - - if (pmo->scratch.pmo_dcn3.current_candidate[0] > 0) { - pmo->scratch.pmo_dcn3.current_candidate[0]--; - success = true; - } else { - for (borrow_from = 1; borrow_from < size && pmo->scratch.pmo_dcn3.current_candidate[borrow_from] == 0; borrow_from++) - ; - - if (borrow_from < size) { - pmo->scratch.pmo_dcn3.current_candidate[borrow_from]--; - for (i = 0; i < borrow_from; i++) { - pmo->scratch.pmo_dcn3.current_candidate[i] = pmo->scratch.pmo_dcn3.reserved_time_candidates_count[i] - 1; - } - - success = true; - } - } - - return success; -} - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i, count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) -{ - unsigned int i; - bool identical = true; - bool contains_drr = false; - unsigned int remap_array[DML2_MAX_PLANES]; - unsigned int remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, - &display_config->display_config.stream_descriptors[remap_array[i]].timing, - sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - for (i = 0; i < remap_array_size; i++) { - if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - return !contains_drr && identical; -} - -bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->options = in_out->options; - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - } - - return true; -} - -bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - else - odm_load = 0; - - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - const struct dml2_stream_parameters *stream_descriptor; - const struct dml2_plane_parameters *plane_descriptor; - unsigned int stream_index, plane_index, candidate_count; - double min_reserved_vblank_time = 0; - int fclk_twait_needed_mask = 0x0; - int uclk_twait_needed_mask = 0x0; - - state->performed = true; - state->min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn3.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn3.max_latency_index = pmo->mcg_clock_table_size - 1; - pmo->scratch.pmo_dcn3.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - pmo->scratch.pmo_dcn3.stream_mask = 0xF; - - for (plane_index = 0; plane_index < in_out->base_display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &in_out->base_display_config->display_config.plane_descriptors[plane_index]; - stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[plane_descriptor->stream_index]; - - if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us && - stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_if_needed) - uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); - - if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_try) - uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); - - if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < - in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us && - stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_if_needed) - fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); - - if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_try) - fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); - - if (plane_descriptor->overrides.legacy_svp_config != dml2_svp_mode_override_auto) { - pmo->scratch.pmo_dcn3.stream_mask &= ~(0x1 << plane_descriptor->stream_index); - } - } - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[stream_index]; - - // The absolute minimum required time is the minimum of all the required budgets - /* - if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate - == dml2_twait_budgeting_setting_require) - - if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { - min_reserved_vblank_time = max_double2(min_reserved_vblank_time, - in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us); - } - - if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate - == dml2_twait_budgeting_setting_require) { - - if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { - min_reserved_vblank_time = max_double2(min_reserved_vblank_time, - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us); - } - } - - if (stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit - == dml2_twait_budgeting_setting_require) - min_reserved_vblank_time = max_double2(min_reserved_vblank_time, - in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us); - */ - - min_reserved_vblank_time = get_max_reserved_time_on_all_planes_with_stream_index(in_out->base_display_config, stream_index); - - // Insert the absolute minimum into the array - candidate_count = 1; - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][0] = min_reserved_vblank_time; - pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; - - if (!(pmo->scratch.pmo_dcn3.stream_mask & (0x1 << stream_index))) - continue; - - // For every optional feature, we create a candidate for it only if it's larger minimum. - if ((fclk_twait_needed_mask & (0x1 << stream_index)) && - in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us > min_reserved_vblank_time) { - - if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = - in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us; - } - } - - if ((uclk_twait_needed_mask & (0x1 << stream_index)) && - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us > min_reserved_vblank_time) { - - if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; - } - } - - if ((stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_try || - stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_if_needed) && - in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > min_reserved_vblank_time) { - - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = - in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; - } - - pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; - - // Finally sort the array of candidates - sort(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], - pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); - - remove_duplicates(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], - &pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); - - pmo->scratch.pmo_dcn3.current_candidate[stream_index] = - pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] - 1; - } - - return true; -} - -bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, stream_index; - - for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { - stream_index = in_out->base_display_config->display_config.plane_descriptors[i].stream_index; - - if (in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]] * 1000) { - return false; - } - } - - return true; -} - -bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - unsigned int stream_index; - bool success = false; - bool reached_end; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (pmo->scratch.pmo_dcn3.cur_latency_index < pmo->scratch.pmo_dcn3.max_latency_index) { - // If we haven't tried all the clock bounds to support this state, try a higher one - pmo->scratch.pmo_dcn3.cur_latency_index++; - - success = true; - } else { - // If there's nothing higher to try, then we have to have a smaller canadidate - reached_end = !iterate_to_next_candidiate(pmo, in_out->optimized_display_config->display_config.num_streams); - - if (!reached_end) { - pmo->scratch.pmo_dcn3.cur_latency_index = pmo->scratch.pmo_dcn3.min_latency_index; - success = true; - } - } - } else { - success = true; - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = pmo->scratch.pmo_dcn3.cur_latency_index; - - for (stream_index = 0; stream_index < in_out->optimized_display_config->display_config.num_streams; stream_index++) { - set_reserved_time_on_all_planes_with_stream_index(in_out->optimized_display_config, stream_index, - pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]]); - } - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h deleted file mode 100644 index f00bd9e72a86..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_PMO_DCN3_H__ -#define __DML2_PMO_DCN3_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c deleted file mode 100644 index 5769c2638f9a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ /dev/null @@ -1,2394 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_pmo_factory.h" -#include "dml2_debug.h" -#include "lib_float_math.h" -#include "dml2_pmo_dcn4_fams2.h" - -static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const double MIN_BLANK_STUTTER_FACTOR = 3.0; - -static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { - // VActive Preferred - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then SVP - { - .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // Then DRR - { - .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Finally VBlank, but allow base clocks for latency to increase - /* - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - */ -}; - -static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); - -static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { - // VActive only is preferred - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then VActive + VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // Then VBlank only - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // Then SVP + VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // Then SVP + DRR - { - .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then SVP + SVP - { - .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then DRR + VActive - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Then DRR + DRR - { - .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // Finally VBlank, but allow base clocks for latency to increase - /* - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - */ -}; - -static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); - -static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { - // All VActive - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // VActive + 1 VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // All VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, - .allow_state_increase = false, - }, - - // All DRR - { - .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - - // All VBlank, with state increase allowed - /* - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, - .allow_state_increase = true, - }, - */ -}; - -static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); - -static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { - // All VActive - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive }, - .allow_state_increase = true, - }, - - // VActive + 1 VBlank - { - .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank }, - .allow_state_increase = false, - }, - - // All Vblank - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, - .allow_state_increase = false, - }, - - // All DRR - { - .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr }, - .allow_state_increase = true, - }, - - // All VBlank, with state increase allowed - /* - { - .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, - .allow_state_increase = true, - }, - */ -}; - -static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); - - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i, count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1 || in_out->instance->options->disable_dyn_odm) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy) -{ - enum dml2_pstate_method variant_strategy = 0; - - switch (base_strategy) { - case dml2_pstate_method_vactive: - variant_strategy = dml2_pstate_method_fw_vactive_drr; - break; - case dml2_pstate_method_vblank: - variant_strategy = dml2_pstate_method_fw_vblank_drr; - break; - case dml2_pstate_method_fw_svp: - variant_strategy = dml2_pstate_method_fw_svp_drr; - break; - case dml2_pstate_method_fw_vactive_drr: - case dml2_pstate_method_fw_vblank_drr: - case dml2_pstate_method_fw_svp_drr: - case dml2_pstate_method_fw_drr: - case dml2_pstate_method_reserved_hw: - case dml2_pstate_method_reserved_fw: - case dml2_pstate_method_reserved_fw_drr_clamped: - case dml2_pstate_method_reserved_fw_drr_var: - case dml2_pstate_method_count: - case dml2_pstate_method_na: - default: - /* no variant for this mode */ - variant_strategy = base_strategy; - } - - return variant_strategy; -} - -static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) -{ - struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; - - switch (stream_count) { - case 1: - expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_1_display; - break; - case 2: - expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_2_display; - break; - case 3: - expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_3_display; - break; - case 4: - expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_4_display; - break; - default: - break; - } - - return expanded_strategy_list; -} - -static unsigned int get_num_expanded_strategies( - struct dml2_pmo_init_data *init_data, - int stream_count) -{ - return init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]; -} - -static void insert_strategy_into_expanded_list( - const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, - const int stream_count, - struct dml2_pmo_pstate_strategy *expanded_strategy_list, - unsigned int *num_expanded_strategies) -{ - if (expanded_strategy_list && num_expanded_strategies) { - memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - - (*num_expanded_strategies)++; - } -} - -static void expand_base_strategy( - const struct dml2_pmo_pstate_strategy *base_strategy, - const unsigned int stream_count, - struct dml2_pmo_pstate_strategy *expanded_strategy_list, - unsigned int *num_expanded_strategies) -{ - bool skip_to_next_stream; - bool expanded_strategy_added; - bool skip_iteration; - unsigned int i, j; - unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; - - /* determine number of displays per method */ - for (i = 0; i < stream_count; i++) { - /* increment the count of the earliest index with the same method */ - for (j = 0; j < stream_count; j++) { - if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { - num_streams_per_method[j] = num_streams_per_method[j] + 1; - break; - } - } - } - - cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; - - i = 0; - /* uses a while loop instead of recursion to build permutations of base strategy */ - while (stream_iteration_indices[0] < stream_count) { - skip_to_next_stream = false; - expanded_strategy_added = false; - skip_iteration = false; - - /* determine what to do for this iteration */ - if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { - /* decrement count and assign method */ - cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; - num_streams_per_method[stream_iteration_indices[i]] -= 1; - - if (i >= stream_count - 1) { - /* insert into strategy list */ - insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies); - expanded_strategy_added = true; - } else { - /* skip to next stream */ - skip_to_next_stream = true; - } - } else { - skip_iteration = true; - } - - /* prepare for next iteration */ - if (skip_to_next_stream) { - i++; - } else { - /* restore count */ - if (!skip_iteration) { - num_streams_per_method[stream_iteration_indices[i]] += 1; - } - - /* increment iteration count */ - stream_iteration_indices[i]++; - - /* if iterations are complete, or last stream was reached */ - if ((stream_iteration_indices[i] >= stream_count || expanded_strategy_added) && i > 0) { - /* reset per stream index, decrement i */ - stream_iteration_indices[i] = 0; - i--; - - /* restore previous stream's count and increment index */ - num_streams_per_method[stream_iteration_indices[i]] += 1; - stream_iteration_indices[i]++; - } - } - } -} - - -static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, - const struct dml2_pmo_pstate_strategy *variant_strategy, - const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], - const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], - const unsigned int stream_count) -{ - bool valid = true; - unsigned int i; - - /* check all restrictions are met */ - for (i = 0; i < stream_count; i++) { - /* vblank + vblank_drr variants are invalid */ - if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank && - ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || - num_streams_per_variant_method[i] > 1)) { - valid = false; - break; - } - } - - return valid; -} - -static void expand_variant_strategy( - const struct dml2_pmo_pstate_strategy *base_strategy, - const unsigned int stream_count, - const bool should_permute, - struct dml2_pmo_pstate_strategy *expanded_strategy_list, - unsigned int *num_expanded_strategies) -{ - bool variant_found; - unsigned int i, j; - unsigned int method_index; - unsigned int stream_index; - unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; - struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; - - /* determine number of displays per method */ - for (i = 0; i < stream_count; i++) { - /* increment the count of the earliest index with the same method */ - for (j = 0; j < stream_count; j++) { - if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { - num_streams_per_method[j] = num_streams_per_method[j] + 1; - break; - } - } - - per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); - } - memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); - - memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - - method_index = 0; - /* uses a while loop instead of recursion to build permutations of base strategy */ - while (num_streams_per_base_method[0] > 0 || method_index != 0) { - if (method_index == stream_count) { - /* construct variant strategy */ - variant_found = false; - stream_index = 0; - - for (i = 0; i < stream_count; i++) { - for (j = 0; j < num_streams_per_base_method[i]; j++) { - variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; - } - - for (j = 0; j < num_streams_per_variant_method[i]; j++) { - variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; - if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { - variant_found = true; - } - } - } - - if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { - if (should_permute) { - /* permutations are permitted, proceed to expand */ - expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); - } else { - /* no permutations allowed, so add to list now */ - insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); - } - } - - /* rollback to earliest method with bases remaining */ - for (method_index = stream_count - 1; method_index > 0; method_index--) { - if (num_streams_per_base_method[method_index]) { - /* bases remaining */ - break; - } else { - /* reset counters */ - num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; - num_streams_per_variant_method[method_index] = 0; - } - } - } - - if (num_streams_per_base_method[method_index]) { - num_streams_per_base_method[method_index]--; - num_streams_per_variant_method[method_index]++; - - method_index++; - } else if (method_index != 0) { - method_index++; - } - } -} - -void pmo_dcn4_fams2_expand_base_pstate_strategies( - const struct dml2_pmo_pstate_strategy *base_strategies_list, - const unsigned int num_base_strategies, - const unsigned int stream_count, - struct dml2_pmo_pstate_strategy *expanded_strategy_list, - unsigned int *num_expanded_strategies) -{ - unsigned int i; - - /* expand every explicit base strategy (except all DRR) */ - for (i = 0; i < num_base_strategies; i++) { - expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies); - expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies); - } -} - -bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - int i = 0; - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int base_list_size = 0; - const struct dml2_pmo_pstate_strategy *base_list = NULL; - unsigned int *expanded_list_size = NULL; - struct dml2_pmo_pstate_strategy *expanded_list = NULL; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->fams_params.v2.subvp.refresh_rate_limit_max = 175; - pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0; - pmo->fams_params.v2.drr.refresh_rate_limit_max = 1000; - pmo->fams_params.v2.drr.refresh_rate_limit_min = 119; - - pmo->options = in_out->options; - - /* generate permutations of p-state configs from base strategy list */ - for (i = 0; i < PMO_DCN4_MAX_DISPLAYS; i++) { - switch (i+1) { - case 1: - if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { - base_list = pmo->options->override_strategy_lists[i]; - base_list_size = pmo->options->num_override_strategies_per_list[i]; - } else { - base_list = base_strategy_list_1_display; - base_list_size = base_strategy_list_1_display_size; - } - - expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; - expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display; - - break; - case 2: - if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { - base_list = pmo->options->override_strategy_lists[i]; - base_list_size = pmo->options->num_override_strategies_per_list[i]; - } else { - base_list = base_strategy_list_2_display; - base_list_size = base_strategy_list_2_display_size; - } - - expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; - expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display; - - break; - case 3: - if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { - base_list = pmo->options->override_strategy_lists[i]; - base_list_size = pmo->options->num_override_strategies_per_list[i]; - } else { - base_list = base_strategy_list_3_display; - base_list_size = base_strategy_list_3_display_size; - } - - expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; - expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display; - - break; - case 4: - if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { - base_list = pmo->options->override_strategy_lists[i]; - base_list_size = pmo->options->num_override_strategies_per_list[i]; - } else { - base_list = base_strategy_list_4_display; - base_list_size = base_strategy_list_4_display_size; - } - - expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; - expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display; - - break; - } - - DML_ASSERT(base_list_size <= PMO_DCN4_MAX_BASE_STRATEGIES); - - /* populate list */ - pmo_dcn4_fams2_expand_base_pstate_strategies( - base_list, - base_list_size, - i + 1, - expanded_list, - expanded_list_size); - } - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - state->unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - state->unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - state->unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - state->unoptimizable_streams[i] = true; - } - - state->performed = true; - - return true; -} - -bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - else - odm_load = 0; - - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) -{ - *bit_field = *bit_field | (0x1 << bit_offset); -} - -static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) -{ - if (bit_field & (0x1 << bit_offset)) - return true; - - return false; -} - -static void build_synchronized_timing_groups( - struct dml2_pmo_instance *pmo, - struct display_configuation_with_meta *display_config) -{ - unsigned int i, j; - struct dml2_timing_cfg *master_timing; - - unsigned int stream_mapped_mask = 0; - unsigned int num_timing_groups = 0; - unsigned int timing_group_idx = 0; - struct dml2_pmo_scratch *s = &pmo->scratch; - - /* clear all group masks */ - memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); - memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); - memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); - memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); - s->pmo_dcn4.num_timing_groups = 0; - - for (i = 0; i < display_config->display_config.num_streams; i++) { - master_timing = &display_config->display_config.stream_descriptors[i].timing; - - /* only need to build group of this stream is not in a group already */ - if (is_bit_set_in_bitfield(stream_mapped_mask, i)) { - continue; - } - set_bit_in_bitfield(&stream_mapped_mask, i); - timing_group_idx = num_timing_groups; - num_timing_groups++; - - /* trivially set default timing group to itself */ - set_bit_in_bitfield(&s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i); - s->pmo_dcn4.group_line_time_us[timing_group_idx] = (double)master_timing->h_total / master_timing->pixel_clock_khz * 1000.0; - - /* if drr is in use, timing is not sychnronizable */ - if (master_timing->drr_config.enabled) { - s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; - s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && - (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); - continue; - } - - /* find synchronizable timing groups */ - for (j = i + 1; j < display_config->display_config.num_streams; j++) { - if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0) { - set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); - set_bit_in_bitfield(&stream_mapped_mask, j); - } - } - } - - s->pmo_dcn4.num_timing_groups = num_timing_groups; -} - -static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_config, - unsigned int mask) -{ - unsigned int i; - bool valid = true; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - /* check if stream has enough vactive margin */ - valid &= is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.stream_vactive_capability_mask, i); - } - } - - return valid; -} - -static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_config, - unsigned int mask) -{ - unsigned int i; - - bool synchronizable = true; - - /* find first vblank stream index and compare the timing group mask */ - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - if (mask != pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[i]) { - /* vblank streams are not synchronizable */ - synchronizable = false; - } - break; - } - } - - return synchronizable; -} - -static unsigned int calc_svp_microschedule(const struct dml2_pstate_meta *pstate_meta) -{ - return pstate_meta->contention_delay_otg_vlines + - pstate_meta->method_subvp.programming_delay_otg_vlines + - pstate_meta->method_subvp.phantom_vtotal + - pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + - pstate_meta->blackout_otg_vlines; -} - -static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_config, - unsigned int mask) -{ - unsigned int i; - for (i = 0; i < DML2_MAX_PLANES; i++) { - const struct dml2_stream_parameters *stream_descriptor; - const struct dml2_pstate_meta *stream_pstate_meta; - - if (is_bit_set_in_bitfield(mask, i)) { - stream_descriptor = &display_config->display_config.stream_descriptors[i]; - stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; - - if (!stream_descriptor->timing.drr_config.enabled) - return false; - - /* cannot support required vtotal */ - if (stream_pstate_meta->method_drr.stretched_vtotal > stream_pstate_meta->max_vtotal) { - return false; - } - - /* check rr is within bounds */ - if (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || - stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { - return false; - } - - /* check required stretch is allowed */ - if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && - stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { - return false; - } - } - } - - return true; -} - -static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_config, - unsigned int mask) -{ - const struct dml2_stream_parameters *stream_descriptor; - const struct dml2_plane_parameters *plane_descriptor; - const struct dml2_pstate_meta *stream_pstate_meta; - unsigned int microschedule_vlines; - unsigned int i; - unsigned int mcaches_per_plane; - unsigned int total_mcaches_required = 0; - - unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; - - /* confirm timing it is not a centered timing */ - for (i = 0; i < display_config->display_config.num_planes; i++) { - plane_descriptor = &display_config->display_config.plane_descriptors[i]; - mcaches_per_plane = 0; - - if (plane_descriptor->surface.dcc.enable) { - mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 + - display_config->stage2.mcache_allocations[i].num_mcaches_plane1 - - (display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0); - } - - if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) { - num_planes_per_stream[plane_descriptor->stream_index]++; - - /* check recout height covers entire otg vactive, and single plane */ - if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || - !plane_descriptor->composition.rect_out_height_spans_vactive || - plane_descriptor->composition.rotation_angle != dml2_rotation_0) { - return false; - } - - /* phantom requires same number of mcaches as main */ - if (plane_descriptor->surface.dcc.enable) { - mcaches_per_plane *= 2; - } - } - total_mcaches_required += mcaches_per_plane; - } - - if (total_mcaches_required > pmo->soc_bb->num_dcc_mcaches) { - /* too many mcaches required */ - return false; - } - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - stream_descriptor = &display_config->display_config.stream_descriptors[i]; - stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; - - if (stream_descriptor->overrides.disable_subvp) { - return false; - } - - microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_pstate_meta[i]); - - /* block if using an interlaced timing */ - if (stream_descriptor->timing.interlaced) { - return false; - } - - /* 1) svp main stream's vactive must be able to fit the microschedule - * 2) refresh rate must be within the allowed bounds - */ - if (microschedule_vlines >= stream_descriptor->timing.v_active || - (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || - stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { - return false; - } - } - } - - return true; -} - -static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) -{ - scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; - scratch->pmo_dcn4.num_pstate_candidates++; -} - -static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy) -{ - enum dml2_pstate_method method = dml2_pstate_method_na; - - switch (override_strategy) { - case dml2_uclk_pstate_change_strategy_force_vactive: - method = dml2_pstate_method_vactive; - break; - case dml2_uclk_pstate_change_strategy_force_vblank: - method = dml2_pstate_method_vblank; - break; - case dml2_uclk_pstate_change_strategy_force_drr: - method = dml2_pstate_method_fw_drr; - break; - case dml2_uclk_pstate_change_strategy_force_mall_svp: - method = dml2_pstate_method_fw_svp; - break; - case dml2_uclk_pstate_change_strategy_force_mall_full_frame: - case dml2_uclk_pstate_change_strategy_auto: - default: - method = dml2_pstate_method_na; - } - - return method; -} - -static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method) -{ - enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto; - - switch (method) { - case dml2_pstate_method_vactive: - case dml2_pstate_method_fw_vactive_drr: - override_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - break; - case dml2_pstate_method_vblank: - case dml2_pstate_method_fw_vblank_drr: - override_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - break; - case dml2_pstate_method_fw_svp: - case dml2_pstate_method_fw_svp_drr: - override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - break; - case dml2_pstate_method_fw_drr: - override_strategy = dml2_uclk_pstate_change_strategy_force_drr; - break; - case dml2_pstate_method_reserved_hw: - case dml2_pstate_method_reserved_fw: - case dml2_pstate_method_reserved_fw_drr_clamped: - case dml2_pstate_method_reserved_fw_drr_var: - case dml2_pstate_method_count: - case dml2_pstate_method_na: - default: - override_strategy = dml2_uclk_pstate_change_strategy_auto; - } - - return override_strategy; -} - -static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) -{ - unsigned int i; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method)) - return false; - } - } - - return true; -} - -static void build_method_scheduling_params( - struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta, - struct dml2_pstate_meta *stream_pstate_meta) -{ - stream_method_pstate_meta->allow_time_us = - (double)((int)stream_method_pstate_meta->allow_end_otg_vline - (int)stream_method_pstate_meta->allow_start_otg_vline) * - stream_pstate_meta->otg_vline_time_us; - if (stream_method_pstate_meta->allow_time_us >= stream_method_pstate_meta->period_us) { - /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/ - stream_method_pstate_meta->disallow_time_us = 0.0; - } else { - stream_method_pstate_meta->disallow_time_us = - stream_method_pstate_meta->period_us - stream_method_pstate_meta->allow_time_us; - } -} - -static struct dml2_pstate_per_method_common_meta *get_per_method_common_meta( - struct dml2_pmo_instance *pmo, - enum dml2_pstate_method stream_pstate_method, - int stream_idx) -{ - struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta = NULL; - - switch (stream_pstate_method) { - case dml2_pstate_method_vactive: - case dml2_pstate_method_fw_vactive_drr: - stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vactive.common; - break; - case dml2_pstate_method_vblank: - case dml2_pstate_method_fw_vblank_drr: - stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vblank.common; - break; - case dml2_pstate_method_fw_svp: - case dml2_pstate_method_fw_svp_drr: - stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_subvp.common; - break; - case dml2_pstate_method_fw_drr: - stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_drr.common; - break; - case dml2_pstate_method_reserved_hw: - case dml2_pstate_method_reserved_fw: - case dml2_pstate_method_reserved_fw_drr_clamped: - case dml2_pstate_method_reserved_fw_drr_var: - case dml2_pstate_method_count: - case dml2_pstate_method_na: - default: - stream_method_pstate_meta = NULL; - } - - return stream_method_pstate_meta; -} - -static bool is_timing_group_schedulable( - struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const struct dml2_pmo_pstate_strategy *pstate_strategy, - const unsigned int timing_group_idx, - struct dml2_pstate_per_method_common_meta *group_pstate_meta) -{ - unsigned int i; - struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta; - - unsigned int base_stream_idx = 0; - struct dml2_pmo_scratch *s = &pmo->scratch; - - /* find base stream idx */ - for (base_stream_idx = 0; base_stream_idx < display_cfg->display_config.num_streams; base_stream_idx++) { - if (is_bit_set_in_bitfield(s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], base_stream_idx)) { - /* master stream found */ - break; - } - } - - /* init allow start and end lines for timing group */ - stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); - if (!stream_method_pstate_meta) - return false; - - group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; - group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; - group_pstate_meta->period_us = stream_method_pstate_meta->period_us; - for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { - if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); - if (!stream_method_pstate_meta) - continue; - - if (group_pstate_meta->allow_start_otg_vline < stream_method_pstate_meta->allow_start_otg_vline) { - /* set group allow start to larger otg vline */ - group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; - } - - if (group_pstate_meta->allow_end_otg_vline > stream_method_pstate_meta->allow_end_otg_vline) { - /* set group allow end to smaller otg vline */ - group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; - } - - /* check waveform still has positive width */ - if (group_pstate_meta->allow_start_otg_vline >= group_pstate_meta->allow_end_otg_vline) { - /* timing group is not schedulable */ - return false; - } - } - } - - /* calculate the rest of the meta */ - build_method_scheduling_params(group_pstate_meta, &pmo->scratch.pmo_dcn4.stream_pstate_meta[base_stream_idx]); - - return group_pstate_meta->allow_time_us > 0.0 && - group_pstate_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us; -} - -static bool is_config_schedulable( - struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const struct dml2_pmo_pstate_strategy *pstate_strategy) -{ - unsigned int i, j; - bool schedulable; - struct dml2_pmo_scratch *s = &pmo->scratch; - - double max_allow_delay_us = 0.0; - - memset(s->pmo_dcn4.group_common_pstate_meta, 0, sizeof(s->pmo_dcn4.group_common_pstate_meta)); - memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES); - - /* search for a general solution to the schedule */ - - /* STAGE 0: Early return for special cases */ - if (display_cfg->display_config.num_streams == 0) { - return true; - } - - /* STAGE 1: confirm allow waves overlap for synchronizable streams */ - schedulable = true; - for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { - s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; - s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_pstate_meta[i])) { - /* synchronized timing group was not schedulable */ - schedulable = false; - break; - } - max_allow_delay_us += s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us; - } - - if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) { - /* 1. the only timing group was schedulable, so early pass - * 2. one of the timing groups was not schedulable, so early fail */ - return schedulable; - } - - /* STAGE 2: Check allow can't be masked entirely by other disallows */ - schedulable = true; - - /* sort disallow times from greatest to least */ - for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { - bool swapped = false; - - for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { - double j_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; - double jp1_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; - if (j_disallow_us < jp1_disallow_us) { - /* swap as A < B */ - swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], - s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); - swapped = true; - } - } - - /* sorted, exit early */ - if (!swapped) - break; - } - - /* Check worst case disallow region occurs in the middle of allow for the - * other display, or when >2 streams continue to halve the remaining allow time. - */ - for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { - if (s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us <= 0.0) { - /* this timing group always allows */ - continue; - } - - double max_allow_time_us = s->pmo_dcn4.group_common_pstate_meta[i].allow_time_us; - for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) { - unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; - /* stream can't overlap itself */ - if (i != sorted_j && s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us > 0.0) { - max_allow_time_us = math_min2( - s->pmo_dcn4.group_common_pstate_meta[sorted_j].allow_time_us, - (max_allow_time_us - s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us) / 2); - - if (max_allow_time_us < 0.0) { - /* failed exit early */ - break; - } - } - } - - if (max_allow_time_us <= 0.0) { - /* not enough time for microschedule in the worst case */ - schedulable = false; - break; - } - } - - if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { - return true; - } - - /* STAGE 3: check larger allow can fit period of all other streams */ - schedulable = true; - - /* sort periods from greatest to least */ - for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { - bool swapped = false; - - for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { - double j_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; - double jp1_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; - if (j_period_us < jp1_period_us) { - /* swap as A < B */ - swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], - s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); - swapped = true; - } - } - - /* sorted, exit early */ - if (!swapped) - break; - } - - /* check larger allow can fit period of all other streams */ - for (i = 0; i < s->pmo_dcn4.num_timing_groups - 1; i++) { - unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i]; - unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; - - if (s->pmo_dcn4.group_common_pstate_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_pstate_meta[sorted_ip1].period_us || - (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { - schedulable = false; - break; - } - } - - if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { - return true; - } - - /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ - if (s->pmo_dcn4.num_timing_groups == 2 && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { - double period_ratio; - double max_shift_us; - double shift_per_period; - - /* default period_0 > period_1 */ - unsigned int lrg_idx = 0; - unsigned int sml_idx = 1; - if (s->pmo_dcn4.group_common_pstate_meta[0].period_us < s->pmo_dcn4.group_common_pstate_meta[1].period_us) { - /* period_0 < period_1 */ - lrg_idx = 1; - sml_idx = 0; - } - period_ratio = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us; - shift_per_period = s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio)); - max_shift_us = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us; - max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us; - - if (shift_per_period > 0.0 && - shift_per_period < s->pmo_dcn4.group_common_pstate_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us && - max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { - schedulable = true; - } - } - - return schedulable; -} - -static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const enum dml2_pstate_method stream_pstate_method, - unsigned int stream_index) -{ - const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; - bool strategy_matches_drr_requirements = true; - - /* check if strategy is compatible with stream drr capability and strategy */ - if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && - display_cfg->display_config.num_streams > 1 && - stream_descriptor->timing.drr_config.enabled && - (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { - /* DRR is active, so config may become unschedulable */ - strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && - is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && - stream_descriptor->timing.drr_config.enabled && - stream_descriptor->timing.drr_config.drr_active_variable) { - /* DRR is variable, fw exclusive methods require DRR to be clamped */ - strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && - pmo->options->disable_drr_var_when_var_active && - stream_descriptor->timing.drr_config.enabled && - stream_descriptor->timing.drr_config.drr_active_variable) { - /* DRR variable is active, but policy blocks DRR for p-state when this happens */ - strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && - (pmo->options->disable_drr_var || - !stream_descriptor->timing.drr_config.enabled || - stream_descriptor->timing.drr_config.disallowed)) { - /* DRR variable strategies are disallowed due to settings or policy */ - strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && - (pmo->options->disable_drr_clamped || - (!stream_descriptor->timing.drr_config.enabled || - (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || - (pmo->options->disable_drr_clamped_when_var_active && - stream_descriptor->timing.drr_config.enabled && - stream_descriptor->timing.drr_config.drr_active_variable))) { - /* DRR fixed strategies are disallowed due to settings or policy */ - strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && - pmo->options->disable_fams2) { - /* FW modes require FAMS2 */ - strategy_matches_drr_requirements = false; - } - - return strategy_matches_drr_requirements; -} - -static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const struct dml2_pmo_pstate_strategy *pstate_strategy) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - - unsigned int stream_index = 0; - - unsigned int svp_count = 0; - unsigned int svp_stream_mask = 0; - unsigned int drr_count = 0; - unsigned int drr_stream_mask = 0; - unsigned int vactive_count = 0; - unsigned int vactive_stream_mask = 0; - unsigned int vblank_count = 0; - unsigned int vblank_stream_mask = 0; - - bool strategy_matches_forced_requirements = true; - bool strategy_matches_drr_requirements = true; - - // Tabulate everything - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - - if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - pstate_strategy->per_stream_pstate_method[stream_index])) { - strategy_matches_forced_requirements = false; - break; - } - - strategy_matches_drr_requirements &= - stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - - if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { - svp_count++; - set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { - drr_count++; - set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { - vactive_count++; - set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { - vblank_count++; - set_bit_in_bitfield(&vblank_stream_mask, stream_index); - } - } - - if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) - return false; - - if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) - return false; - - if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) - return false; - - if (drr_count > 0 && (pmo->options->disable_drr_var || !all_timings_support_drr(pmo, display_cfg, drr_stream_mask))) - return false; - - if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) - return false; - - return is_config_schedulable(pmo, display_cfg, pstate_strategy); -} - -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned int i; - int min_vactive_margin_us = 0xFFFFFFF; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) - min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; - } - } - - return min_vactive_margin_us; -} - -static unsigned int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned char i; - unsigned int max_vactive_fill_us = 0; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us > max_vactive_fill_us) - max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us; - } - } - - return max_vactive_fill_us; -} - -static void build_pstate_meta_per_stream(struct dml2_pmo_instance *pmo, - struct display_configuation_with_meta *display_config, - int stream_index) -{ - const struct dml2_ip_capabilities *ip_caps = pmo->ip_caps; - const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index]; - const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index]; - const struct dml2_timing_cfg *timing = &stream_descriptor->timing; - struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; - - /* worst case all other streams require some programming at the same time, 0 if only 1 stream */ - unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us + - (unsigned int)math_max3(ip_caps->fams2.subvp_programming_delay_us, ip_caps->fams2.drr_programming_delay_us, ip_caps->fams2.allow_programming_delay_us)) * - (display_config->display_config.num_streams - 1); - - /* common */ - stream_pstate_meta->valid = true; - stream_pstate_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; - stream_pstate_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; - stream_pstate_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / - (stream_pstate_meta->nom_vtotal * timing->h_total); - stream_pstate_meta->nom_frame_time_us = - (double)stream_pstate_meta->nom_vtotal * stream_pstate_meta->otg_vline_time_us; - stream_pstate_meta->vblank_start = timing->v_blank_end + timing->v_active; - - if (stream_descriptor->timing.drr_config.enabled == true) { - if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { - stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / - ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9); - } else { - /* assume min of 48Hz */ - stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / - (48000000.0 * stream_descriptor->timing.h_total) * 1e9); - } - } else { - stream_pstate_meta->max_vtotal = stream_pstate_meta->nom_vtotal; - } - stream_pstate_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / - (stream_pstate_meta->max_vtotal * timing->h_total); - stream_pstate_meta->max_frame_time_us = - (double)stream_pstate_meta->max_vtotal * stream_pstate_meta->otg_vline_time_us; - - stream_pstate_meta->scheduling_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->contention_delay_otg_vlines = - (unsigned int)math_ceil(contention_delay_us / stream_pstate_meta->otg_vline_time_us); - /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */ - stream_pstate_meta->allow_to_target_delay_otg_vlines = - (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_pstate_meta->otg_vline_time_us)) + 1; - stream_pstate_meta->min_allow_width_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_pstate_meta->otg_vline_time_us); - /* this value should account for urgent latency */ - stream_pstate_meta->blackout_otg_vlines = - (unsigned int)math_ceil(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us / - stream_pstate_meta->otg_vline_time_us); - - /* scheduling params should be built based on the worst case for allow_time:disallow_time */ - - /* vactive */ - if (display_config->display_config.num_streams == 1) { - /* for single stream, guarantee at least an instant of allow */ - stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( - math_max2(0.0, - timing->v_active - math_max2(1.0, stream_pstate_meta->min_allow_width_otg_vlines) - stream_pstate_meta->blackout_otg_vlines)); - } else { - /* for multi stream, bound to a max fill time defined by IP caps */ - stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = - (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_pstate_meta->otg_vline_time_us); - } - stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us = stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_pstate_meta->otg_vline_time_us; - - if (stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) { - stream_pstate_meta->method_vactive.common.allow_start_otg_vline = - timing->v_blank_end + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; - stream_pstate_meta->method_vactive.common.allow_end_otg_vline = - stream_pstate_meta->vblank_start - - stream_pstate_meta->blackout_otg_vlines; - } else { - stream_pstate_meta->method_vactive.common.allow_start_otg_vline = 0; - stream_pstate_meta->method_vactive.common.allow_end_otg_vline = 0; - } - stream_pstate_meta->method_vactive.common.period_us = stream_pstate_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_pstate_meta->method_vactive.common, stream_pstate_meta); - - /* vblank */ - stream_pstate_meta->method_vblank.common.allow_start_otg_vline = stream_pstate_meta->vblank_start; - stream_pstate_meta->method_vblank.common.allow_end_otg_vline = - stream_pstate_meta->method_vblank.common.allow_start_otg_vline + 1; - stream_pstate_meta->method_vblank.common.period_us = stream_pstate_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_pstate_meta->method_vblank.common, stream_pstate_meta); - - /* subvp */ - stream_pstate_meta->method_subvp.programming_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->method_subvp.phantom_vactive = - stream_pstate_meta->allow_to_target_delay_otg_vlines + - stream_pstate_meta->min_allow_width_otg_vlines + - stream_info->phantom_min_v_active; - stream_pstate_meta->method_subvp.phantom_vfp = - stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines; - /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/ - stream_pstate_meta->method_subvp.phantom_vtotal = - stream_info->phantom_v_startup + - stream_pstate_meta->method_subvp.phantom_vfp + - 1 + - stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines + - stream_pstate_meta->method_subvp.phantom_vactive; - stream_pstate_meta->method_subvp.common.allow_start_otg_vline = - stream_descriptor->timing.v_blank_end + - stream_pstate_meta->contention_delay_otg_vlines + - stream_pstate_meta->method_subvp.programming_delay_otg_vlines + - stream_pstate_meta->method_subvp.phantom_vtotal + - stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + - stream_pstate_meta->allow_to_target_delay_otg_vlines; - stream_pstate_meta->method_subvp.common.allow_end_otg_vline = - stream_pstate_meta->vblank_start - - stream_pstate_meta->blackout_otg_vlines; - stream_pstate_meta->method_subvp.common.period_us = stream_pstate_meta->nom_frame_time_us; - build_method_scheduling_params(&stream_pstate_meta->method_subvp.common, stream_pstate_meta); - - /* drr */ - stream_pstate_meta->method_drr.programming_delay_otg_vlines = - (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_pstate_meta->otg_vline_time_us); - stream_pstate_meta->method_drr.common.allow_start_otg_vline = - stream_pstate_meta->vblank_start + - stream_pstate_meta->allow_to_target_delay_otg_vlines; - stream_pstate_meta->method_drr.common.period_us = stream_pstate_meta->nom_frame_time_us; - if (display_config->display_config.num_streams <= 1) { - /* only need to stretch vblank for blackout time */ - stream_pstate_meta->method_drr.stretched_vtotal = - stream_pstate_meta->nom_vtotal + - stream_pstate_meta->allow_to_target_delay_otg_vlines + - stream_pstate_meta->min_allow_width_otg_vlines + - stream_pstate_meta->blackout_otg_vlines; - } else { - /* multi display needs to always be schedulable */ - stream_pstate_meta->method_drr.stretched_vtotal = - stream_pstate_meta->nom_vtotal * 2 + - stream_pstate_meta->allow_to_target_delay_otg_vlines + - stream_pstate_meta->min_allow_width_otg_vlines + - stream_pstate_meta->blackout_otg_vlines; - } - stream_pstate_meta->method_drr.common.allow_end_otg_vline = - stream_pstate_meta->method_drr.stretched_vtotal - - stream_pstate_meta->blackout_otg_vlines; - build_method_scheduling_params(&stream_pstate_meta->method_drr.common, stream_pstate_meta); -} - -static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, - struct display_configuation_with_meta *display_config, - int stream_index) -{ - struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; - - stream_svp_meta->valid = true; - - /* PMO FAMS2 precaulcates these values */ - stream_svp_meta->v_active = stream_pstate_meta->method_subvp.phantom_vactive; - stream_svp_meta->v_front_porch = stream_pstate_meta->method_subvp.phantom_vfp; - stream_svp_meta->v_total = stream_pstate_meta->method_subvp.phantom_vtotal; -} - -bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - struct dml2_pmo_scratch *s = &pmo->scratch; - - struct display_configuation_with_meta *display_config; - const struct dml2_plane_parameters *plane_descriptor; - const struct dml2_pmo_pstate_strategy *strategy_list = NULL; - struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; - unsigned int strategy_list_size = 0; - unsigned int plane_index, stream_index, i; - bool build_override_strategy = true; - - state->performed = true; - in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; - - display_config = in_out->base_display_config; - display_config->display_config.overrides.enable_subvp_implicit_pmo = true; - - memset(s, 0, sizeof(struct dml2_pmo_scratch)); - - if (display_config->display_config.overrides.all_streams_blanked) { - return true; - } - - pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; - pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; - - set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - - state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; - - build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto; - override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] = - uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy); - } - - // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) - set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); - - /* FAMS2 meta */ - build_pstate_meta_per_stream(pmo, display_config, stream_index); - - /* SVP meta */ - build_subvp_meta_per_stream(pmo, display_config, stream_index); - } - - /* get synchronized timing groups */ - build_synchronized_timing_groups(pmo, display_config); - - if (build_override_strategy) { - /* build expanded override strategy list (no permutations) */ - override_base_strategy.allow_state_increase = true; - s->pmo_dcn4.num_expanded_override_strategies = 0; - insert_strategy_into_expanded_list(&override_base_strategy, - display_config->display_config.num_streams, - s->pmo_dcn4.expanded_override_strategy_list, - &s->pmo_dcn4.num_expanded_override_strategies); - expand_variant_strategy(&override_base_strategy, - display_config->display_config.num_streams, - false, - s->pmo_dcn4.expanded_override_strategy_list, - &s->pmo_dcn4.num_expanded_override_strategies); - - /* use override strategy list */ - strategy_list = s->pmo_dcn4.expanded_override_strategy_list; - strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies; - } else { - /* use predefined strategy list */ - strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); - strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); - } - - if (!strategy_list || strategy_list_size == 0) - return false; - - s->pmo_dcn4.num_pstate_candidates = 0; - - for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { - insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); - } - } - - if (s->pmo_dcn4.num_pstate_candidates > 0) { - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true; - s->pmo_dcn4.cur_pstate_candidate = -1; - return true; - } else { - return false; - } -} - -static void reset_display_configuration(struct display_configuation_with_meta *display_config) -{ - unsigned int plane_index; - unsigned int stream_index; - struct dml2_plane_parameters *plane; - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - display_config->stage3.stream_svp_meta[stream_index].valid = false; - - display_config->display_config.stream_descriptors[stream_index].overrides.minimize_active_latency_hiding = false; - display_config->display_config.overrides.best_effort_min_active_latency_hiding_us = 0; - } - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Unset SubVP - plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; - - // Remove reserve time - plane->overrides.reserved_vblank_time_ns = 0; - - // Reset strategy to auto - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na; - } -} - -static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - unsigned int plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr; - - } - } -} - -static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - struct dml2_pmo_scratch *scratch = &pmo->scratch; - - unsigned int plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp; - } - } - - if (stream_index >= 0) { - memcpy(&display_config->stage3.stream_svp_meta[stream_index], - &scratch->pmo_dcn4.stream_svp_meta[stream_index], - sizeof(struct dml2_implicit_svp_meta)); - } -} - -static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - struct dml2_pmo_scratch *scratch = &pmo->scratch; - - unsigned int plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr; - } - } - - if (stream_index >= 0) { - memcpy(&display_config->stage3.stream_svp_meta[stream_index], - &scratch->pmo_dcn4.stream_svp_meta[stream_index], - sizeof(struct dml2_implicit_svp_meta)); - } -} - -static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - unsigned int plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, - plane->overrides.reserved_vblank_time_ns); - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank; - - } - } -} - -static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - unsigned int plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr; - } - } -} - -static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - unsigned int plane_index; - unsigned int stream_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; - - if (!pmo->options->disable_vactive_det_fill_bw_pad) { - display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = - (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); - } - } - } -} - -static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_with_meta *display_config, - struct dml2_pmo_instance *pmo, - int plane_mask) -{ - unsigned int plane_index; - unsigned int stream_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; - - if (!pmo->options->disable_vactive_det_fill_bw_pad) { - display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = - (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); - } - } - } -} - -static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_instance *pmo, int strategy_index) -{ - struct dml2_pmo_scratch *scratch = &pmo->scratch; - - bool fams2_required = false; - bool success = true; - unsigned int stream_index; - - reset_display_configuration(display_config); - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - - if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { - success = false; - break; - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) { - setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { - setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) { - fams2_required = true; - setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { - fams2_required = true; - setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { - fams2_required = true; - setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { - fams2_required = true; - setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { - fams2_required = true; - setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } - } - - /* copy FAMS2 meta */ - if (success) { - display_config->stage3.fams2_required = fams2_required; - memcpy(&display_config->stage3.stream_pstate_meta, - &scratch->pmo_dcn4.stream_pstate_meta, - sizeof(struct dml2_pstate_meta) * DML2_MAX_PLANES); - } - - return success; -} - -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) -{ - int min_time_us = 0xFFFFFF; - unsigned int plane_index = 0; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) - min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; - } - } - return min_time_us; -} - -bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - bool p_state_supported = true; - unsigned int stream_index; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - int MIN_VACTIVE_MARGIN_VBLANK = 0; - int MIN_VACTIVE_MARGIN_DRR = 0; - int REQUIRED_RESERVED_TIME = 0; - - if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { - return true; - } - - MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; - MIN_VACTIVE_MARGIN_DRR = INT_MIN; - REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; - - if (s->pmo_dcn4.cur_pstate_candidate < 0) - return false; - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - struct dml2_pstate_meta *stream_pstate_meta = &s->pmo_dcn4.stream_pstate_meta[stream_index]; - - if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || - get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < - REQUIRED_RESERVED_TIME || - get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { - if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { - if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || - get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { - p_state_supported = false; - break; - } - } - - return p_state_supported; -} - -bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { - s->pmo_dcn4.cur_latency_index++; - - success = true; - } - } - - if (!success) { - s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; - s->pmo_dcn4.cur_pstate_candidate++; - - if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { - success = true; - } - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; - setup_display_config(in_out->optimized_display_config, in_out->instance, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); - } - - return success; -} - -bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) -{ - bool success = true; - struct dml2_pmo_instance *pmo = in_out->instance; - bool stutter_period_meets_z8_eco = true; - bool z8_stutter_optimization_too_expensive = false; - bool stutter_optimization_too_expensive = false; - double line_time_us, vblank_nom_time_us; - - unsigned int i; - - if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && - pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) - return false; // Unexpected SoCBB setup - - for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { - if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[i].active_latency_hiding_us < - pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us + pmo->soc_bb->power_management_parameters.z8_min_idle_time) { - stutter_period_meets_z8_eco = false; - break; - } - } - - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { - line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000; - vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom; - - if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { - z8_stutter_optimization_too_expensive = true; - break; - } - - if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { - stutter_optimization_too_expensive = true; - break; - } - } - - pmo->scratch.pmo_dcn4.num_stutter_candidates = 0; - pmo->scratch.pmo_dcn4.cur_stutter_candidate = 0; - - if (stutter_period_meets_z8_eco && !z8_stutter_optimization_too_expensive) { - if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0) { - pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us; - pmo->scratch.pmo_dcn4.num_stutter_candidates++; - pmo->scratch.pmo_dcn4.z8_vblank_optimizable = true; - } - } else { - pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false; - } - - if (!stutter_optimization_too_expensive && pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { - pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; - pmo->scratch.pmo_dcn4.num_stutter_candidates++; - } - - if (pmo->scratch.pmo_dcn4.num_stutter_candidates == 0) - success = false; - - return success; -} - -bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) -{ - bool success = true; - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - - for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { - if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && - pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { - success = false; - break; - } - if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { - success = false; - break; - } - } - - return success; -} - -bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_instance *pmo = in_out->instance; - unsigned int i; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (!in_out->last_candidate_failed) { - if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { - /* take the max of the current and the optimal reserved time */ - in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = - (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, - in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); - } - - success = true; - } - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h deleted file mode 100644 index 6baab7ad6ecc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_PMO_FAMS2_DCN4_H__ -#define __DML2_PMO_FAMS2_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out); -bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); -bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); - -void pmo_dcn4_fams2_expand_base_pstate_strategies( - const struct dml2_pmo_pstate_strategy *base_strategies_list, - const unsigned int num_base_strategies, - const unsigned int stream_count, - struct dml2_pmo_pstate_strategy *expanded_strategy_list, - unsigned int *num_expanded_strategies); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c deleted file mode 100644 index 55d2464365d0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4_fams2.h" -#include "dml2_pmo_dcn3.h" -#include "dml2_external_lib_deps.h" - -static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) -{ - return false; -} - -static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) -{ - return true; -} - -static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) -{ - return false; -} - -bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out) -{ - bool result = false; - - if (!out) - return false; - - memset(out, 0, sizeof(struct dml2_pmo_instance)); - - switch (project_id) { - case dml2_project_dcn4x_stage1: - out->initialize = pmo_dcn4_fams2_initialize; - out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; - result = true; - break; - case dml2_project_dcn4x_stage2: - out->initialize = pmo_dcn3_initialize; - - out->optimize_dcc_mcache = pmo_dcn3_optimize_dcc_mcache; - - out->init_for_vmin = pmo_dcn3_init_for_vmin; - out->test_for_vmin = pmo_dcn3_test_for_vmin; - out->optimize_for_vmin = pmo_dcn3_optimize_for_vmin; - - out->init_for_uclk_pstate = pmo_dcn3_init_for_pstate_support; - out->test_for_uclk_pstate = pmo_dcn3_test_for_pstate_support; - out->optimize_for_uclk_pstate = pmo_dcn3_optimize_for_pstate_support; - - out->init_for_stutter = dummy_init_for_stutter; - out->test_for_stutter = dummy_test_for_stutter; - out->optimize_for_stutter = dummy_optimize_for_stutter; - - result = true; - break; - case dml2_project_dcn4x_stage2_auto_drr_svp: - out->initialize = pmo_dcn4_fams2_initialize; - - out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; - - out->init_for_vmin = pmo_dcn4_fams2_init_for_vmin; - out->test_for_vmin = pmo_dcn4_fams2_test_for_vmin; - out->optimize_for_vmin = pmo_dcn4_fams2_optimize_for_vmin; - - out->init_for_uclk_pstate = pmo_dcn4_fams2_init_for_pstate_support; - out->test_for_uclk_pstate = pmo_dcn4_fams2_test_for_pstate_support; - out->optimize_for_uclk_pstate = pmo_dcn4_fams2_optimize_for_pstate_support; - - out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; - out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; - out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; - result = true; - break; - case dml2_project_invalid: - default: - break; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h deleted file mode 100644 index 7218de1824cc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_PMO_FACTORY_H__ -#define __DML2_PMO_FACTORY_H__ - -#include "dml2_internal_shared_types.h" -#include "dml_top_types.h" - -bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c deleted file mode 100644 index e17b5ceba447..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "lib_float_math.h" - -#define ASSERT(condition) - -#define isNaN(number) ((number) != (number)) - - /* - * NOTE: - * This file is gcc-parseable HW gospel, coming straight from HW engineers. - * - * It doesn't adhere to Linux kernel style and sometimes will do things in odd - * ways. Unless there is something clearly wrong with it the code should - * remain as-is as it provides us with a guarantee from HW that it is correct. - */ - -double math_mod(const double arg1, const double arg2) -{ - if (isNaN(arg1)) - return arg2; - if (isNaN(arg2)) - return arg1; - return arg1 - arg1 * ((int)(arg1 / arg2)); -} - -double math_min2(const double arg1, const double arg2) -{ - if (isNaN(arg1)) - return arg2; - if (isNaN(arg2)) - return arg1; - return arg1 < arg2 ? arg1 : arg2; -} - -double math_max2(const double arg1, const double arg2) -{ - if (isNaN(arg1)) - return arg2; - if (isNaN(arg2)) - return arg1; - return arg1 > arg2 ? arg1 : arg2; -} - -double math_floor2(const double arg, const double significance) -{ - ASSERT(significance != 0); - - return ((int)(arg / significance)) * significance; -} - -double math_floor(const double arg) -{ - return ((int)(arg)); -} - -double math_ceil(const double arg) -{ - return (int)(arg + 0.99999); -} - -double math_ceil2(const double arg, const double significance) -{ - return ((int)(arg / significance + 0.99999)) * significance; -} - -double math_max3(double v1, double v2, double v3) -{ - return v3 > math_max2(v1, v2) ? v3 : math_max2(v1, v2); -} - -double math_max4(double v1, double v2, double v3, double v4) -{ - return v4 > math_max3(v1, v2, v3) ? v4 : math_max3(v1, v2, v3); -} - -double math_max5(double v1, double v2, double v3, double v4, double v5) -{ - return math_max3(v1, v2, v3) > math_max2(v4, v5) ? math_max3(v1, v2, v3) : math_max2(v4, v5); -} - -float math_pow(float a, float exp) -{ - double temp; - if ((int)exp == 0) - return 1; - temp = math_pow(a, (float)((int)(exp / 2))); - if (((int)exp % 2) == 0) { - return (float)(temp * temp); - } else { - if ((int)exp > 0) - return (float)(a * temp * temp); - else - return (float)((temp * temp) / a); - } -} - -double math_fabs(double a) -{ - if (a > 0) - return (a); - else - return (-a); -} - -float math_log(float a, float b) -{ - int *const exp_ptr = (int *)(&a); - int x = *exp_ptr; - const int log_2 = ((x >> 23) & 255) - 128; - x &= ~(255 << 23); - x += 127 << 23; - *exp_ptr = x; - - a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3; - - if (b > 2.00001 || b < 1.99999) - return (a + log_2) / math_log(b, 2); - else - return (a + log_2); -} - -float math_log2(float a) -{ - return math_log(a, 2.0); -} - -// approximate log2 value of a input -// - precise if the input pwr of 2, else the approximation will be an integer = floor(actual_log2) -unsigned int math_log2_approx(unsigned int a) -{ - unsigned int log2_val = 0; - while (a > 1) { - a = a >> 1; - log2_val++; - } - return log2_val; -} - -double math_round(double a) -{ - const double round_pt = 0.5; - - return math_floor(a + round_pt); -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h deleted file mode 100644 index e13b0c5939b0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __LIB_FLOAT_MATH_H__ -#define __LIB_FLOAT_MATH_H__ - -double math_mod(const double arg1, const double arg2); -double math_min2(const double arg1, const double arg2); -double math_max2(const double arg1, const double arg2); -double math_floor2(const double arg, const double significance); -double math_floor(const double arg); -double math_ceil(const double arg); -double math_ceil2(const double arg, const double significance); -double math_max3(double v1, double v2, double v3); -double math_max4(double v1, double v2, double v3, double v4); -double math_max5(double v1, double v2, double v3, double v4, double v5); -float math_pow(float a, float exp); -double math_fabs(double a); -float math_log(float a, float b); -float math_log2(float a); -unsigned int math_log2_approx(unsigned int a); -double math_round(double a); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c deleted file mode 100644 index 5a33e2f357f4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml_top.h" -#include "dml2_internal_shared_types.h" -#include "dml2_top_soc15.h" - -unsigned int dml2_get_instance_size_bytes(void) -{ - return sizeof(struct dml2_instance); -} - -bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) -{ - switch (in_out->options.project_id) { - case dml2_project_dcn4x_stage1: - case dml2_project_dcn4x_stage2: - case dml2_project_dcn4x_stage2_auto_drr_svp: - return dml2_top_soc15_initialize_instance(in_out); - case dml2_project_invalid: - default: - return false; - } -} - -bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) -{ - if (!in_out->dml2_instance->funcs.check_mode_supported) - return false; - - return in_out->dml2_instance->funcs.check_mode_supported(in_out); -} - -bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) -{ - if (!in_out->dml2_instance->funcs.build_mode_programming) - return false; - - return in_out->dml2_instance->funcs.build_mode_programming(in_out); -} - -bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) -{ - if (!in_out->dml2_instance->funcs.build_mcache_programming) - return false; - - return in_out->dml2_instance->funcs.build_mcache_programming(in_out); -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c deleted file mode 100644 index 5e14d85821e2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_top_legacy.h" -#include "dml2_top_soc15.h" -#include "dml2_core_factory.h" -#include "dml2_pmo_factory.h" -#include "display_mode_core_structs.h" - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h deleted file mode 100644 index 14d0ae03dce6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_TOP_LEGACY_H__ -#define __DML2_TOP_LEGACY_H__ -#include "dml2_internal_shared_types.h" -bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out); -#endif /* __DML2_TOP_LEGACY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c deleted file mode 100644 index 4a7c4c62111e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c +++ /dev/null @@ -1,1170 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_top_soc15.h" -#include "dml2_mcg_factory.h" -#include "dml2_dpmm_factory.h" -#include "dml2_core_factory.h" -#include "dml2_pmo_factory.h" -#include "lib_float_math.h" -#include "dml2_debug.h" -static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) -{ - memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); - out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; -} - -static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) -{ - memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); - out->stage1.min_clk_index_for_latency = 0; -} - -static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) -{ - memcpy(dst, src, sizeof(struct display_configuation_with_meta)); -} - -static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - state->performed = true; - - return true; -} - -static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - return state->min_clk_index_for_latency == 0; -} - -static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) -{ - bool result = false; - - if (params->display_config->stage1.min_clk_index_for_latency > 0) { - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - params->optimized_display_config->stage1.min_clk_index_for_latency--; - result = true; - } - - return result; -} - -static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - bool mcache_success = false; - bool result = false; - - memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); - - l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; - l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; - l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - - result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations - - if (result) { - l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; - - dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); - - l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; - l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; - l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; - - mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works - - memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); - } - - return mcache_success; -} - -static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - bool optimize_success = false; - - if (params->last_candidate_supported == false) - return false; - - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - - l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; - l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; - l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; - l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; - l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; - - optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); - - return optimize_success; -} - -static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->vmin.init_params.instance = ¶ms->dml->pmo_instance; - l->vmin.init_params.base_display_config = params->display_config; - return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); -} - -static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; - l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; - l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; - return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); -} - -static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - if (params->last_candidate_supported == false) - return false; - - l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; - l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; - l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; - return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); -} - -static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); -} - -static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.test_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); -} - -static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.optimize_params.base_display_config = params->display_config; - l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; - l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; - - return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); -} - -static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); -} - -static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); -} - -static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; - l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; - return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); -} - -static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - bool test_passed = false; - bool optimize_succeeded = true; - bool candidate_validation_passed = true; - struct optimization_init_function_params init_params = { 0 }; - struct optimization_test_function_params test_params = { 0 }; - struct optimization_optimize_function_params optimize_params = { 0 }; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - - init_params.locals = &l->init_function_locals; - init_params.dml = params->dml; - init_params.display_config = &l->cur_candidate_display_cfg; - - if (params->init_function && !params->init_function(&init_params)) - return false; - - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->cur_candidate_display_cfg; - - test_passed = params->test_function(&test_params); - - while (!test_passed && optimize_succeeded) { - memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); - - optimize_params.locals = &l->optimize_function_locals; - optimize_params.dml = params->dml; - optimize_params.display_config = &l->cur_candidate_display_cfg; - optimize_params.optimized_display_config = &l->next_candidate_display_cfg; - optimize_params.last_candidate_supported = candidate_validation_passed; - - optimize_succeeded = params->optimize_function(&optimize_params); - - if (optimize_succeeded) { - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - - if (l->next_candidate_display_cfg.stage3.performed) - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; - else - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; - candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); - l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - } - - if (optimize_succeeded && candidate_validation_passed) { - memset(&test_params, 0, sizeof(struct optimization_test_function_params)); - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->next_candidate_display_cfg; - test_passed = params->test_function(&test_params); - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); - - // If optimization is not all or nothing, then store partial progress in output - if (!params->all_or_nothing) - copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); - } - } - - if (test_passed) - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return test_passed; -} - -static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - int highest_state, lowest_state, cur_state; - bool supported = false; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; - lowest_state = 0; - - while (highest_state > lowest_state) { - cur_state = (highest_state + lowest_state) / 2; - - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - l->mode_support_params.min_clk_index = cur_state; - supported = params->dml->core_instance.mode_support(&l->mode_support_params); - - if (supported) { - l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - highest_state = cur_state; - } else { - lowest_state = cur_state + 1; - } - } - l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; - - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return true; -} - -/* -* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. -* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means -* that the horizontal viewport does not span more than 2 cache slices. -* -* It optionally also can apply a constant shift to all the cache boundaries. -*/ -static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; -static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; - -static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, - int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) -{ - const int MAX_VP = 0xFFFFFF; - int left_cache_id; - int right_cache_id; - int range_start; - int range_end; - bool success = false; - - if (num_boundaries <= 1) { - if (first_offset && second_offset) { - *first_offset = 0; - *second_offset = -1; - } - success = true; - return success; - } else { - range_start = 0; - for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { - range_end = mcache_boundaries[left_cache_id] - shift - 1; - - if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) - break; - - range_start = range_end + 1; - } - - range_end = MAX_VP; - for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { - if (right_cache_id >= 0) - range_start = mcache_boundaries[right_cache_id] - shift; - else - range_start = 0; - - if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { - break; - } - range_end = range_start - 1; - } - right_cache_id = (right_cache_id + 1) % num_boundaries; - - if (right_cache_id == left_cache_id) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = -1; - } - success = true; - } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = right_cache_id; - } - success = true; - } - } - - return success; -} - -/* -* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. -* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. -*/ -static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, - int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) -{ - int max_shift = 0xFFFF; - unsigned int pipe_index; - unsigned int i, slice_width; - bool success = false; - - for (i = 0; i < num_boundaries; i++) { - if (i == 0) - slice_width = mcache_boundaries[i]; - else - slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; - - if (max_shift > (int)slice_width) { - max_shift = slice_width; - } - } - - for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { - success = true; - for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { - if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, - pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { - success = false; - break; - } - } - if (success) - break; - } - - return success; -} - -/* -* Counts the number of elements inside input array within the given span length. -* Formally, what is the size of the largest subset of the array where the largest and smallest element -* differ no more than the span. -*/ -static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) -{ - unsigned int i; - unsigned int span_start_value; - unsigned int span_start_index; - unsigned int greatest_element_count; - - if (array_size == 0) - return 1; - - if (span == 0) - return array_size > 0 ? 1 : 0; - - span_start_value = 0; - span_start_index = 0; - greatest_element_count = 0; - - while (span_start_index < array_size) { - for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value <= span) { - if (i - span_start_index + 1 > greatest_element_count) { - greatest_element_count = i - span_start_index + 1; - } - } else - break; - } - - span_start_index++; - - if (span_start_index < array_size) { - span_start_value = array[span_start_index - 1] + 1; - } - } - - return greatest_element_count; -} - -static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, - enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) -{ - int i, slice_width; - const char MAX_SCL_VP_OVERLAP = 3; - bool success = false; - - switch (scaling_transform) { - case dml2_scaling_transform_centered: - case dml2_scaling_transform_aspect_ratio: - case dml2_scaling_transform_fullscreen: - slice_width = full_vp_width / num_pipes; - for (i = 0; i < num_pipes; i++) { - pipe_vp_x_start[i] = i * slice_width; - pipe_vp_x_end[i] = (i + 1) * slice_width - 1; - - if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) - pipe_vp_x_start[i] = 0; - else - pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; - - if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) - pipe_vp_x_end[i] = full_vp_width - 1; - else - pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; - } - break; - case dml2_scaling_transform_explicit: - default: - success = false; - break; - } - - return success; -} - -bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; - - const int MAX_PIXEL_OVERLAP = 6; - int max_per_pipe_vp_p0 = 0; - int max_per_pipe_vp_p1 = 0; - int temp, p0shift, p1shift; - unsigned int plane_index = 0; - unsigned int i; - unsigned int odm_combine_factor; - unsigned int mpc_combine_factor; - unsigned int num_dpps; - unsigned int num_boundaries; - enum dml2_scaling_transform scaling_transform; - const struct dml2_plane_parameters *plane; - const struct dml2_stream_parameters *stream; - - bool p0pass = false; - bool p1pass = false; - bool all_pass = true; - - for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { - if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) - continue; - - plane = ¶ms->display_cfg->plane_descriptors[plane_index]; - stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - - num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; - - if (odm_combine_factor == 1) - num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; - else - mpc_combine_factor = 1; - - if (odm_combine_factor > 1) { - max_per_pipe_vp_p0 = plane->surface.plane0.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p0) - max_per_pipe_vp_p0 = temp; - - max_per_pipe_vp_p1 = plane->surface.plane1.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p1) - max_per_pipe_vp_p1 = temp; - } else { - max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; - max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; - } - - max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; - max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; - - p0shift = 0; - p1shift = 0; - - // The last element in the unshifted boundary array will always be the first pixel outside the - // plane, which means theres no mcache associated with it, so -1 - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { - p0pass = true; - } - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { - p1pass = true; - } - - if (!p0pass || !p1pass) { - if (odm_combine_factor > 1) { - num_dpps = odm_combine_factor; - scaling_transform = plane->composition.scaling_transform; - } else { - num_dpps = mpc_combine_factor; - scaling_transform = dml2_scaling_transform_fullscreen; - } - - if (!p0pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane0.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - params->mcache_allocations[plane_index].num_mcaches_plane0, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); - } - } - if (!p1pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane1.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - params->mcache_allocations[plane_index].num_mcaches_plane1, - &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); - } - } - } - - if (p0pass && p1pass) { - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; - } - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; - } - } - - params->per_plane_status[plane_index] = p0pass && p1pass; - all_pass &= p0pass && p1pass; - } - - return all_pass; -} - -static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) -{ - // Initialize all entries to special valid MCache ID and special valid split coordinate - per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; -} - -void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) -{ - int i; - unsigned int j; - int next_unused_cache_id = 0; - - for (i = 0; i < params->num_allocations; i++) { - if (!params->allocations[i].valid) - continue; - - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_plane0[0]; - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_plane1[0]; - - // If we need dedicated caches for mall requesting, then we assign them here. - if (params->allocations[i].requires_dedicated_mall_mcache) { - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_mall_plane0[0]; - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_mall_plane1[0]; - } - - // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same - // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the - // largest mcacheID of P0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - - // If we need dedicated caches handle last slice sharing - if (params->allocations[i].requires_dedicated_mall_mcache) { - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular - // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) - if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; - } - } - - // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting - if (!params->allocations[i].requires_dedicated_mall_mcache) { - memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, - sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); - memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, - sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); - } - } -} - -bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; - - unsigned int total_mcaches_required; - unsigned int i; - bool result = false; - - if (dml->soc_bbox.num_dcc_mcaches == 0) { - return true; - } - - total_mcaches_required = 0; - l->calc_mcache_params.instance = &dml->core_instance; - for (i = 0; i < params->display_config->num_planes; i++) { - if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { - memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); - continue; - } - - l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; - l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; - l->calc_mcache_params.plane_index = i; - - if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { - result = false; - break; - } - - if (params->mcache_allocations[i].valid) { - total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; - if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) - total_mcaches_required--; - } - } - DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); - - if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { - result = false; - } else { - result = true; - } - - return result; -} - -static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) -{ - struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; - struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; - struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; - - bool result = false; - bool mcache_success = false; - memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); - - setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); - - l->mode_support_params.instance = &dml->core_instance; - l->mode_support_params.display_cfg = &l->base_display_config_with_meta; - l->mode_support_params.min_clk_table = &dml->min_clk_table; - l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; - result = dml->core_instance.mode_support(&l->mode_support_params); - l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; - - if (result) { - struct optimization_phase_params mcache_phase = { - .dml = dml, - .display_config = &l->base_display_config_with_meta, - .test_function = dml2_top_optimization_test_function_mcache, - .optimize_function = dml2_top_optimization_optimize_function_mcache, - .optimized_display_config = &l->optimized_display_config_with_meta, - .all_or_nothing = false, - }; - mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); - } - - /* - * Call DPMM to map all requirements to minimum clock state - */ - if (result) { - l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; - l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; - l->dppm_map_mode_params.programming = dpmm_programming; - l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; - l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; - result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); - } - - in_out->is_supported = mcache_success; - result = result && in_out->is_supported; - - return result; -} - -static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) -{ - struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; - struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; - - bool result = false; - bool mcache_success = false; - bool uclk_pstate_success = false; - bool vmin_success = false; - bool stutter_success = false; - - memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); - memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); - - memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); - - setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); - - l->mode_support_params.instance = &dml->core_instance; - l->mode_support_params.display_cfg = &l->base_display_config_with_meta; - l->mode_support_params.min_clk_table = &dml->min_clk_table; - l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; - result = dml->core_instance.mode_support(&l->mode_support_params); - - l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; - - if (!result) { - setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); - - l->mode_support_params.instance = &dml->core_instance; - l->mode_support_params.display_cfg = &l->base_display_config_with_meta; - l->mode_support_params.min_clk_table = &dml->min_clk_table; - l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; - result = dml->core_instance.mode_support(&l->mode_support_params); - l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; - - if (!result) { - l->informative_params.instance = &dml->core_instance; - l->informative_params.programming = in_out->programming; - l->informative_params.mode_is_supported = false; - dml->core_instance.populate_informative(&l->informative_params); - - return false; - } - - /* - * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode - */ - memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); - l->min_clock_for_latency_phase.dml = dml; - l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; - l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; - l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; - l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; - l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; - l->min_clock_for_latency_phase.all_or_nothing = false; - - dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); - - memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - } - - /* - * Phase 2: Satisfy DCC mcache requirements - */ - memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); - l->mcache_phase.dml = dml; - l->mcache_phase.display_config = &l->base_display_config_with_meta; - l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; - l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; - l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; - l->mcache_phase.all_or_nothing = true; - - mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); - - if (!mcache_success) { - l->informative_params.instance = &dml->core_instance; - l->informative_params.programming = in_out->programming; - l->informative_params.mode_is_supported = false; - - dml->core_instance.populate_informative(&l->informative_params); - - in_out->programming->informative.failed_mcache_validation = true; - return false; - } - - memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - - /* - * Phase 3: Optimize for Pstate - */ - memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); - l->uclk_pstate_phase.dml = dml; - l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; - l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; - l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; - l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; - l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; - l->uclk_pstate_phase.all_or_nothing = true; - - uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); - - if (uclk_pstate_success) { - memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage3.success = true; - } - - /* - * Phase 4: Optimize for Vmin - */ - memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); - l->vmin_phase.dml = dml; - l->vmin_phase.display_config = &l->base_display_config_with_meta; - l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; - l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; - l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; - l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; - l->vmin_phase.all_or_nothing = false; - - vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - - if (l->optimized_display_config_with_meta.stage4.performed) { - /* - * when performed is true, optimization has applied to - * optimized_display_config_with_meta and it has passed mode - * support. However it may or may not pass the test function to - * reach actual Vmin. As long as voltage is optimized even if it - * doesn't reach Vmin level, there is still power benefit so in - * this case we will still copy this optimization into base - * display config. - */ - memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = vmin_success; - } - - /* - * Phase 5: Optimize for Stutter - */ - memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); - l->stutter_phase.dml = dml; - l->stutter_phase.display_config = &l->base_display_config_with_meta; - l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; - l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; - l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; - l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; - l->stutter_phase.all_or_nothing = true; - - stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); - - if (stutter_success) { - memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage5.success = true; - } - - /* - * Call DPMM to map all requirements to minimum clock state - */ - if (result) { - l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; - l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; - l->dppm_map_mode_params.programming = in_out->programming; - l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; - l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; - result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); - if (!result) - in_out->programming->informative.failed_dpmm = true; - } - - if (result) { - l->mode_programming_params.instance = &dml->core_instance; - l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; - l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; - l->mode_programming_params.programming = in_out->programming; - result = dml->core_instance.mode_programming(&l->mode_programming_params); - if (!result) - in_out->programming->informative.failed_mode_programming = true; - } - - if (result) { - l->dppm_map_watermarks_params.core = &dml->core_instance; - l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; - l->dppm_map_watermarks_params.programming = in_out->programming; - result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); - } - - l->informative_params.instance = &dml->core_instance; - l->informative_params.programming = in_out->programming; - l->informative_params.mode_is_supported = result; - - dml->core_instance.populate_informative(&l->informative_params); - - return result; -} - -bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) -{ - bool success = true; - int config_index, pipe_index; - int first_offset, second_offset; - int free_per_plane_reg_index = 0; - - memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); - - for (config_index = 0; config_index < params->num_configurations; config_index++) { - for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { - // Allocate storage for the mcache regs - params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; - - reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); - - if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { - // P0 always enabled - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - } - - // Populate P1 if enabled - if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - } - } - } - } - } - - return success; -} - -static const struct dml2_top_funcs soc15_funcs = { - .check_mode_supported = dml2_top_soc15_check_mode_supported, - .build_mode_programming = dml2_top_soc15_build_mode_programming, - .build_mcache_programming = dml2_top_soc15_build_mcache_programming, -}; - -bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out) -{ - struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; - struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; - struct dml2_core_initialize_in_out core_init_params = { 0 }; - struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; - struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; - bool result = false; - - memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); - memset(dml, 0, sizeof(struct dml2_instance)); - - memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); - memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); - - dml->project_id = in_out->options.project_id; - dml->pmo_options = in_out->options.pmo_options; - - // Initialize All Components - result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); - - if (result) - result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); - - if (result) - result = dml2_core_create(in_out->options.project_id, &dml->core_instance); - - if (result) { - mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; - mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; - result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); - } - - if (result) { - core_init_params.project_id = in_out->options.project_id; - core_init_params.instance = &dml->core_instance; - core_init_params.minimum_clock_table = &dml->min_clk_table; - core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; - core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; - core_init_params.ip_caps = &in_out->ip_caps; - core_init_params.soc_bb = &in_out->soc_bb; - result = dml->core_instance.initialize(&core_init_params); - - if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { - memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); - } - } - - if (result) - result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); - - if (result) { - pmo_init_params.instance = &dml->pmo_instance; - pmo_init_params.soc_bb = &dml->soc_bbox; - pmo_init_params.ip_caps = &dml->ip_caps; - pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; - pmo_init_params.options = &dml->pmo_options; - dml->pmo_instance.initialize(&pmo_init_params); - } - dml->funcs = soc15_funcs; - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h deleted file mode 100644 index 53bd8602f9ef..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_TOP_SOC15_H__ -#define __DML2_TOP_SOC15_H__ -#include "dml2_internal_shared_types.h" -bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out); - -bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); -void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); -bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); -bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); -#endif /* __DML2_TOP_SOC15_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h deleted file mode 100644 index 611c80f4f1bf..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ /dev/null @@ -1,189 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_DEBUG_H__ -#define __DML2_DEBUG_H__ - -#include "os_types.h" -#define DML_ASSERT(condition) ASSERT(condition) -#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN -#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) - -/* private helper macros */ -#define _BOOL_FORMAT(field) "%s", field ? "true" : "false" -#define _UINT_FORMAT(field) "%u", field -#define _INT_FORMAT(field) "%d", field -#define _DOUBLE_FORMAT(field) "%lf", field -#define _ELEMENT_FUNC "function" -#define _ELEMENT_COMP_IF "component_interface" -#define _ELEMENT_TOP_IF "top_interface" -#define _LOG_ENTRY(element) do { \ - DML_LOG_INTERNAL("<"element" name=\""); \ - DML_LOG_INTERNAL(__func__); \ - DML_LOG_INTERNAL("\">\n"); \ -} while (0) -#define _LOG_EXIT(element) DML_LOG_INTERNAL("\n") -#define _LOG_SCALAR(field, format) do { \ - DML_LOG_INTERNAL(#field" = "format(field)); \ - DML_LOG_INTERNAL("\n"); \ -} while (0) -#define _LOG_ARRAY(field, size, format) do { \ - DML_LOG_INTERNAL(#field " = ["); \ - for (int _i = 0; _i < (int) size; _i++) { \ - DML_LOG_INTERNAL(format(field[_i])); \ - if (_i + 1 == (int) size) \ - DML_LOG_INTERNAL("]\n"); \ - else \ - DML_LOG_INTERNAL(", "); \ -}} while (0) -#define _LOG_2D_ARRAY(field, size0, size1, format) do { \ - DML_LOG_INTERNAL(#field" = ["); \ - for (int _i = 0; _i < (int) size0; _i++) { \ - DML_LOG_INTERNAL("\n\t["); \ - for (int _j = 0; _j < (int) size1; _j++) { \ - DML_LOG_INTERNAL(format(field[_i][_j])); \ - if (_j + 1 == (int) size1) \ - DML_LOG_INTERNAL("]"); \ - else \ - DML_LOG_INTERNAL(", "); \ - } \ - if (_i + 1 == (int) size0) \ - DML_LOG_INTERNAL("]\n"); \ - else \ - DML_LOG_INTERNAL(", "); \ - } \ -} while (0) -#define _LOG_3D_ARRAY(field, size0, size1, size2, format) do { \ - DML_LOG_INTERNAL(#field" = ["); \ - for (int _i = 0; _i < (int) size0; _i++) { \ - DML_LOG_INTERNAL("\n\t["); \ - for (int _j = 0; _j < (int) size1; _j++) { \ - DML_LOG_INTERNAL("["); \ - for (int _k = 0; _k < (int) size2; _k++) { \ - DML_LOG_INTERNAL(format(field[_i][_j][_k])); \ - if (_k + 1 == (int) size2) \ - DML_LOG_INTERNAL("]"); \ - else \ - DML_LOG_INTERNAL(", "); \ - } \ - if (_j + 1 == (int) size1) \ - DML_LOG_INTERNAL("]"); \ - else \ - DML_LOG_INTERNAL(", "); \ - } \ - if (_i + 1 == (int) size0) \ - DML_LOG_INTERNAL("]\n"); \ - else \ - DML_LOG_INTERNAL(", "); \ - } \ -} while (0) - -/* fatal errors for unrecoverable DML states until a full reset */ -#define DML_LOG_LEVEL_FATAL 0 -/* unexpected but recoverable failures inside DML */ -#define DML_LOG_LEVEL_ERROR 1 -/* unexpected inputs or events to DML */ -#define DML_LOG_LEVEL_WARN 2 -/* high level tracing of DML interfaces */ -#define DML_LOG_LEVEL_INFO 3 -/* tracing of DML internal executions */ -#define DML_LOG_LEVEL_DEBUG 4 -/* detailed tracing of DML calculation procedure */ -#define DML_LOG_LEVEL_VERBOSE 5 - -#ifndef DML_LOG_LEVEL -#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT -#endif /* #ifndef DML_LOG_LEVEL */ - -/* public macros for DML_LOG_LEVEL_FATAL and up */ -#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) - -/* public macros for DML_LOG_LEVEL_ERROR and up */ -#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR -#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) -#define DML_ASSERT_MSG(condition, fmt, ...) \ - do { \ - if (!(condition)) { \ - DML_LOG_ERROR("ASSERT hit in %s line %d\n", __func__, __LINE__); \ - DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ - DML_ASSERT(condition); \ - } \ - } while (0) -#else -#define DML_LOG_ERROR(fmt, ...) ((void)0) -#define DML_ASSERT_MSG(condition, fmt, ...) ((void)0) -#endif - -/* public macros for DML_LOG_LEVEL_WARN and up */ -#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN -#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) -#else -#define DML_LOG_WARN(fmt, ...) ((void)0) -#endif - -/* public macros for DML_LOG_LEVEL_INFO and up */ -#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO -#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) -#define DML_LOG_TOP_IF_ENTER() _LOG_ENTRY(_ELEMENT_TOP_IF) -#define DML_LOG_TOP_IF_EXIT() _LOG_EXIT(_ELEMENT_TOP_IF) -#else -#define DML_LOG_INFO(fmt, ...) ((void)0) -#define DML_LOG_TOP_IF_ENTER() ((void)0) -#define DML_LOG_TOP_IF_EXIT() ((void)0) -#endif - -/* public macros for DML_LOG_LEVEL_DEBUG and up */ -#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG -#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) -#define DML_LOG_COMP_IF_ENTER() _LOG_ENTRY(_ELEMENT_COMP_IF) -#define DML_LOG_COMP_IF_EXIT() _LOG_EXIT(_ELEMENT_COMP_IF) -#define DML_LOG_FUNC_ENTER() _LOG_ENTRY(_ELEMENT_FUNC) -#define DML_LOG_FUNC_EXIT() _LOG_EXIT(_ELEMENT_FUNC) -#define DML_LOG_DEBUG_BOOL(field) _LOG_SCALAR(field, _BOOL_FORMAT) -#define DML_LOG_DEBUG_UINT(field) _LOG_SCALAR(field, _UINT_FORMAT) -#define DML_LOG_DEBUG_INT(field) _LOG_SCALAR(field, _INT_FORMAT) -#define DML_LOG_DEBUG_DOUBLE(field) _LOG_SCALAR(field, _DOUBLE_FORMAT) -#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) _LOG_ARRAY(field, size, _BOOL_FORMAT) -#define DML_LOG_DEBUG_ARRAY_UINT(field, size) _LOG_ARRAY(field, size, _UINT_FORMAT) -#define DML_LOG_DEBUG_ARRAY_INT(field, size) _LOG_ARRAY(field, size, _INT_FORMAT) -#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) _LOG_ARRAY(field, size, _DOUBLE_FORMAT) -#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _BOOL_FORMAT) -#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _UINT_FORMAT) -#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _INT_FORMAT) -#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _DOUBLE_FORMAT) -#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _BOOL_FORMAT) -#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _UINT_FORMAT) -#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _INT_FORMAT) -#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _DOUBLE_FORMAT) -#else -#define DML_LOG_DEBUG(fmt, ...) ((void)0) -#define DML_LOG_COMP_IF_ENTER() ((void)0) -#define DML_LOG_COMP_IF_EXIT() ((void)0) -#define DML_LOG_FUNC_ENTER() ((void)0) -#define DML_LOG_FUNC_EXIT() ((void)0) -#define DML_LOG_DEBUG_BOOL(field) ((void)0) -#define DML_LOG_DEBUG_UINT(field) ((void)0) -#define DML_LOG_DEBUG_INT(field) ((void)0) -#define DML_LOG_DEBUG_DOUBLE(field) ((void)0) -#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) ((void)0) -#define DML_LOG_DEBUG_ARRAY_UINT(field, size) ((void)0) -#define DML_LOG_DEBUG_ARRAY_INT(field, size) ((void)0) -#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) ((void)0) -#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) ((void)0) -#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) ((void)0) -#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) ((void)0) -#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) ((void)0) -#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) ((void)0) -#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) ((void)0) -#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) ((void)0) -#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) ((void)0) -#endif - -/* public macros for DML_LOG_LEVEL_VERBOSE */ -#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE -#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) -#else -#define DML_LOG_VERBOSE(fmt, ...) ((void)0) -#endif /* #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE */ -#endif /* __DML2_DEBUG_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h deleted file mode 100644 index 9f562f0c4797..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ /dev/null @@ -1,1004 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_INTERNAL_SHARED_TYPES_H__ -#define __DML2_INTERNAL_SHARED_TYPES_H__ - -#include "dml2_external_lib_deps.h" -#include "dml_top_types.h" -#include "dml2_core_shared_types.h" -/* -* DML2 MCG Types and Interfaces -*/ - -#define DML_MCG_MAX_CLK_TABLE_SIZE 20 - -struct dram_bw_to_min_clk_table_entry { - unsigned long long pre_derate_dram_bw_kbps; - unsigned long min_fclk_khz; - unsigned long min_dcfclk_khz; -}; - -struct dml2_mcg_dram_bw_to_min_clk_table { - struct dram_bw_to_min_clk_table_entry entries[DML_MCG_MAX_CLK_TABLE_SIZE]; - - unsigned int num_entries; -}; - -struct dml2_mcg_min_clock_table { - struct { - unsigned int dispclk; - unsigned int dppclk; - unsigned int dscclk; - unsigned int dtbclk; - unsigned int phyclk; - unsigned int fclk; - unsigned int dcfclk; - } max_clocks_khz; - - struct { - unsigned int dispclk; - unsigned int dppclk; - unsigned int dtbclk; - } max_ss_clocks_khz; - - struct { - unsigned int dprefclk; - unsigned int xtalclk; - unsigned int pcierefclk; - unsigned int dchubrefclk; - unsigned int amclk; - } fixed_clocks_khz; - - struct dml2_mcg_dram_bw_to_min_clk_table dram_bw_table; -}; - -struct dml2_mcg_build_min_clock_table_params_in_out { - /* - * Input - */ - struct dml2_soc_bb *soc_bb; - struct { - bool perform_pseudo_build; - } clean_me_up; - - /* - * Output - */ - struct dml2_mcg_min_clock_table *min_clk_table; -}; -struct dml2_mcg_instance { - bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); -}; - -/* -* DML2 DPMM Types and Interfaces -*/ - -struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { - /* - * Input - */ - struct dml2_core_ip_params *ip; - struct dml2_soc_bb *soc_bb; - struct dml2_mcg_min_clock_table *min_clk_table; - const struct display_configuation_with_meta *display_cfg; - struct { - bool perform_pseudo_map; - struct dml2_core_internal_soc_bb *soc_bb; - } clean_me_up; - - /* - * Output - */ - struct dml2_display_cfg_programming *programming; -}; - -struct dml2_dpmm_map_watermarks_params_in_out { - /* - * Input - */ - const struct display_configuation_with_meta *display_cfg; - const struct dml2_core_instance *core; - - /* - * Output - */ - struct dml2_display_cfg_programming *programming; -}; - -struct dml2_dpmm_scratch { - struct dml2_display_cfg_programming programming; -}; - -struct dml2_dpmm_instance { - bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); - bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); - - struct dml2_dpmm_scratch dpmm_scratch; -}; - -/* -* DML2 Core Types and Interfaces -*/ - -struct dml2_core_initialize_in_out { - enum dml2_project_id project_id; - struct dml2_core_instance *instance; - struct dml2_soc_bb *soc_bb; - struct dml2_ip_capabilities *ip_caps; - - struct dml2_mcg_min_clock_table *minimum_clock_table; - - void *explicit_ip_bb; - unsigned int explicit_ip_bb_size; - - // FIXME_STAGE2 can remove but dcn3 version still need this - struct { - struct soc_bounding_box_st *soc_bb; - struct soc_states_st *soc_states; - } legacy; -}; - -struct core_bandwidth_requirements { - int urgent_bandwidth_kbytes_per_sec; - int average_bandwidth_kbytes_per_sec; -}; - -struct core_plane_support_info { - int dpps_used; - int dram_change_latency_hiding_margin_in_active; - int active_latency_hiding_us; - int mall_svp_size_requirement_ways; - int nominal_vblank_pstate_latency_hiding_us; - unsigned int dram_change_vactive_det_fill_delay_us; -}; - -struct core_stream_support_info { - unsigned int odms_used; - unsigned int num_odm_output_segments; // for odm split mode (e.g. a value of 2 for odm_mode_mso_1to2) - - /* FAMS2 SubVP support info */ - unsigned int phantom_min_v_active; - unsigned int phantom_v_startup; - - unsigned int phantom_v_active; - unsigned int phantom_v_total; - int vblank_reserved_time_us; - int num_dsc_slices; - bool dsc_enable; -}; - -struct core_display_cfg_support_info { - bool is_supported; - - struct core_stream_support_info stream_support_info[DML2_MAX_PLANES]; - struct core_plane_support_info plane_support_info[DML2_MAX_PLANES]; - - struct { - struct dml2_core_internal_mode_support_info support_info; - } clean_me_up; -}; - -struct dml2_core_mode_support_result { - struct { - struct { - unsigned long urgent_bw_sdp_kbps; - unsigned long average_bw_sdp_kbps; - unsigned long urgent_bw_dram_kbps; - unsigned long average_bw_dram_kbps; - unsigned long dcfclk_khz; - unsigned long fclk_khz; - } svp_prefetch; - - struct { - unsigned long urgent_bw_sdp_kbps; - unsigned long average_bw_sdp_kbps; - unsigned long urgent_bw_dram_kbps; - unsigned long average_bw_dram_kbps; - unsigned long dcfclk_khz; - unsigned long fclk_khz; - } active; - - unsigned int dispclk_khz; - unsigned int dpprefclk_khz; - unsigned int dtbrefclk_khz; - unsigned int dcfclk_deepsleep_khz; - unsigned int socclk_khz; - - unsigned int uclk_pstate_supported; - unsigned int fclk_pstate_supported; - } global; - - struct { - unsigned int dscclk_khz; - unsigned int dtbclk_khz; - unsigned int phyclk_khz; - } per_stream[DML2_MAX_PLANES]; - - struct { - unsigned int dppclk_khz; - unsigned int mall_svp_allocation_mblks; - unsigned int mall_full_frame_allocation_mblks; - } per_plane[DML2_MAX_PLANES]; - - struct core_display_cfg_support_info cfg_support_info; -}; - -struct dml2_optimization_stage1_state { - bool performed; - bool success; - - int min_clk_index_for_latency; -}; - -struct dml2_optimization_stage2_state { - bool performed; - bool success; - - // Whether or not each plane supports mcache - // The number of valid elements == display_cfg.num_planes - // The indexing of pstate_switch_modes matches plane_descriptors[] - bool per_plane_mcache_support[DML2_MAX_PLANES]; - struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; -}; - -#define DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS 8 -#define DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE 10 -#define DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE 3 - -struct dml2_implicit_svp_meta { - bool valid; - unsigned long v_active; - unsigned long v_total; - unsigned long v_front_porch; -}; - -struct dml2_pstate_per_method_common_meta { - /* generic params */ - unsigned int allow_start_otg_vline; - unsigned int allow_end_otg_vline; - /* scheduling params */ - double allow_time_us; - double disallow_time_us; - double period_us; -}; - -struct dml2_pstate_meta { - bool valid; - double otg_vline_time_us; - unsigned int scheduling_delay_otg_vlines; - unsigned int vertical_interrupt_ack_delay_otg_vlines; - unsigned int allow_to_target_delay_otg_vlines; - unsigned int contention_delay_otg_vlines; - unsigned int min_allow_width_otg_vlines; - unsigned int nom_vtotal; - unsigned int vblank_start; - double nom_refresh_rate_hz; - double nom_frame_time_us; - unsigned int max_vtotal; - double min_refresh_rate_hz; - double max_frame_time_us; - unsigned int blackout_otg_vlines; - struct { - double max_vactive_det_fill_delay_us; - unsigned int max_vactive_det_fill_delay_otg_vlines; - struct dml2_pstate_per_method_common_meta common; - } method_vactive; - struct { - struct dml2_pstate_per_method_common_meta common; - } method_vblank; - struct { - unsigned int programming_delay_otg_vlines; - unsigned int df_throttle_delay_otg_vlines; - unsigned int prefetch_to_mall_delay_otg_vlines; - unsigned long phantom_vactive; - unsigned long phantom_vfp; - unsigned long phantom_vtotal; - struct dml2_pstate_per_method_common_meta common; - } method_subvp; - struct { - unsigned int programming_delay_otg_vlines; - unsigned int stretched_vtotal; - struct dml2_pstate_per_method_common_meta common; - } method_drr; -}; - -/* mask of synchronized timings by stream index */ -struct dml2_pmo_synchronized_timing_groups { - unsigned int num_timing_groups; - unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; - bool group_is_drr_enabled[DML2_MAX_PLANES]; - bool group_is_drr_active[DML2_MAX_PLANES]; - double group_line_time_us[DML2_MAX_PLANES]; -}; - -struct dml2_optimization_stage3_state { - bool performed; - bool success; - - // The pstate support mode for each plane - // The number of valid elements == display_cfg.num_planes - // The indexing of pstate_switch_modes matches plane_descriptors[] - enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; - - // Meta-data for implicit SVP generation, indexed by stream index - struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; - - // Meta-data for FAMS2 - bool fams2_required; - struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; - - int min_clk_index_for_latency; -}; - -struct dml2_optimization_stage4_state { - bool performed; - bool success; - bool unoptimizable_streams[DML2_MAX_DCN_PIPES]; -}; - -struct dml2_optimization_stage5_state { - bool performed; - bool success; - - bool optimal_reserved_time_in_vblank_us; - bool vblank_includes_z8_optimization; -}; - -struct display_configuation_with_meta { - struct dml2_display_cfg display_config; - - struct dml2_core_mode_support_result mode_support_result; - - // Stage 1 = Min Clocks for Latency - struct dml2_optimization_stage1_state stage1; - - // Stage 2 = MCache - struct dml2_optimization_stage2_state stage2; - - // Stage 3 = UCLK PState - struct dml2_optimization_stage3_state stage3; - - // Stage 4 = Vmin - struct dml2_optimization_stage4_state stage4; - - // Stage 5 = Stutter - struct dml2_optimization_stage5_state stage5; -}; - -struct dml2_pmo_pstate_strategy { - enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; - bool allow_state_increase; -}; -struct dml2_core_mode_support_in_out { - /* - * Inputs - */ - struct dml2_core_instance *instance; - const struct display_configuation_with_meta *display_cfg; - - struct dml2_mcg_min_clock_table *min_clk_table; - int min_clk_index; - /* - * Outputs - */ - struct dml2_core_mode_support_result mode_support_result; - - struct { - // Inputs - struct dml_display_cfg_st *display_cfg; - - // Outputs - struct dml_mode_support_info_st *support_info; - unsigned int out_lowest_state_idx; - unsigned int min_fclk_khz; - unsigned int min_dcfclk_khz; - unsigned int min_dram_speed_mts; - unsigned int min_socclk_khz; - unsigned int min_dscclk_khz; - unsigned int min_dtbclk_khz; - unsigned int min_phyclk_khz; - } legacy; -}; - -struct dml2_core_mode_programming_in_out { - /* - * Inputs - */ - struct dml2_core_instance *instance; - const struct display_configuation_with_meta *display_cfg; - const struct core_display_cfg_support_info *cfg_support_info; - /* - * Outputs (also Input the clk freq are also from programming struct) - */ - struct dml2_display_cfg_programming *programming; - -}; - -struct dml2_core_populate_informative_in_out { - /* - * Inputs - */ - struct dml2_core_instance *instance; - - // If this is set, then the mode was supported, and mode programming - // was successfully run. - // Otherwise, mode programming was not run, because mode support failed. - bool mode_is_supported; - - /* - * Outputs - */ - struct dml2_display_cfg_programming *programming; -}; - -struct dml2_calculate_mcache_allocation_in_out { - /* - * Inputs - */ - struct dml2_core_instance *instance; - const struct dml2_plane_parameters *plane_descriptor; - unsigned int plane_index; - - /* - * Outputs - */ - struct dml2_mcache_surface_allocation *mcache_allocation; -}; - -struct dml2_core_internal_state_inputs { - unsigned int dummy; -}; - -struct dml2_core_internal_state_intermediates { - unsigned int dummy; -}; - -struct dml2_core_mode_support_locals { - union { - struct dml2_core_calcs_mode_support_ex mode_support_ex_params; - }; - struct dml2_display_cfg svp_expanded_display_cfg; - struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; -}; - -struct dml2_core_mode_programming_locals { - union { - struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; - }; - struct dml2_display_cfg svp_expanded_display_cfg; -}; - -struct dml2_core_scratch { - struct dml2_core_mode_support_locals mode_support_locals; - struct dml2_core_mode_programming_locals mode_programming_locals; - int main_stream_index_from_svp_stream_index[DML2_MAX_PLANES]; - int svp_stream_index_from_main_stream_index[DML2_MAX_PLANES]; - int main_plane_index_to_phantom_plane_index[DML2_MAX_PLANES]; - int phantom_plane_index_to_main_plane_index[DML2_MAX_PLANES]; -}; - -struct dml2_core_instance { - enum dml2_project_id project_id; - struct dml2_mcg_min_clock_table *minimum_clock_table; - struct dml2_core_internal_state_inputs inputs; - struct dml2_core_internal_state_intermediates intermediates; - - struct dml2_core_scratch scratch; - - bool (*initialize)(struct dml2_core_initialize_in_out *in_out); - bool (*mode_support)(struct dml2_core_mode_support_in_out *in_out); - bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out); - bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out); - bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out); - - struct { - struct dml2_core_internal_display_mode_lib mode_lib; - } clean_me_up; -}; - -/* -* DML2 PMO Types and Interfaces -*/ - -struct dml2_pmo_initialize_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct dml2_soc_bb *soc_bb; - struct dml2_ip_capabilities *ip_caps; - struct dml2_pmo_options *options; - int mcg_clock_table_size; -}; - -struct dml2_pmo_optimize_dcc_mcache_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - const struct dml2_display_cfg *display_config; - bool *dcc_mcache_supported; - struct core_display_cfg_support_info *cfg_support_info; - - /* - * Output - */ - struct dml2_display_cfg *optimized_display_cfg; -}; - -struct dml2_pmo_init_for_vmin_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; -}; - -struct dml2_pmo_test_for_vmin_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - const struct display_configuation_with_meta *display_config; - const struct dml2_soc_vmin_clock_limits *vmin_limits; -}; - -struct dml2_pmo_optimize_for_vmin_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; - - /* - * Output - */ - struct display_configuation_with_meta *optimized_display_config; -}; - -struct dml2_pmo_init_for_pstate_support_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; -}; - -struct dml2_pmo_test_for_pstate_support_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; -}; - -struct dml2_pmo_optimize_for_pstate_support_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; - bool last_candidate_failed; - - /* - * Output - */ - struct display_configuation_with_meta *optimized_display_config; -}; - -struct dml2_pmo_init_for_stutter_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; -}; - -struct dml2_pmo_test_for_stutter_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; -}; - -struct dml2_pmo_optimize_for_stutter_in_out { - /* - * Input - */ - struct dml2_pmo_instance *instance; - struct display_configuation_with_meta *base_display_config; - bool last_candidate_failed; - - /* - * Output - */ - struct display_configuation_with_meta *optimized_display_config; -}; - -#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na) -#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) -#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) -#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr) -#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp) - -#define PMO_DCN4_MAX_DISPLAYS 4 -#define PMO_DCN4_MAX_NUM_VARIANTS 2 -#define PMO_DCN4_MAX_BASE_STRATEGIES 10 - -struct dml2_scheduling_check_locals { - struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; - unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; - unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; -}; - -struct dml2_pmo_scratch { - union { - struct { - double reserved_time_candidates[DML2_MAX_PLANES][DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS]; - int reserved_time_candidates_count[DML2_MAX_PLANES]; - int current_candidate[DML2_MAX_PLANES]; - int min_latency_index; - int max_latency_index; - int cur_latency_index; - int stream_mask; - } pmo_dcn3; - struct { - struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2]; - unsigned int num_expanded_override_strategies; - struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; - int num_pstate_candidates; - int cur_pstate_candidate; - - unsigned int stream_plane_mask[DML2_MAX_PLANES]; - - unsigned int stream_vactive_capability_mask; - - int min_latency_index; - int max_latency_index; - int cur_latency_index; - - // Stores all the implicit SVP meta information indexed by stream index of the display - // configuration under inspection, built at optimization stage init - struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; - struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; - - unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE]; - unsigned int num_stutter_candidates; - unsigned int cur_stutter_candidate; - bool z8_vblank_optimizable; - - /* mask of synchronized timings by stream index */ - unsigned int num_timing_groups; - unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; - bool group_is_drr_enabled[DML2_MAX_PLANES]; - bool group_is_drr_active[DML2_MAX_PLANES]; - double group_line_time_us[DML2_MAX_PLANES]; - - /* scheduling check locals */ - struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; - unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; - unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; - double group_phase_offset[DML2_MAX_PLANES]; - } pmo_dcn4; - }; -}; - -struct dml2_pmo_init_data { - union { - struct { - /* populated once during initialization */ - struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; - struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; - struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; - struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; - unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; - } pmo_dcn4; - }; -}; - -struct dml2_pmo_instance { - struct dml2_soc_bb *soc_bb; - struct dml2_ip_capabilities *ip_caps; - - struct dml2_pmo_options *options; - - int disp_clk_vmin_threshold; - int mpc_combine_limit; - int odm_combine_limit; - int mcg_clock_table_size; - union { - struct { - struct { - int prefetch_end_to_mall_start_us; - int fw_processing_delay_us; - int refresh_rate_limit_min; - int refresh_rate_limit_max; - } subvp; - } v1; - struct { - struct { - int refresh_rate_limit_min; - int refresh_rate_limit_max; - } subvp; - struct { - int refresh_rate_limit_min; - int refresh_rate_limit_max; - } drr; - } v2; - } fams_params; - - bool (*initialize)(struct dml2_pmo_initialize_in_out *in_out); - bool (*optimize_dcc_mcache)(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - - bool (*init_for_vmin)(struct dml2_pmo_init_for_vmin_in_out *in_out); - bool (*test_for_vmin)(struct dml2_pmo_test_for_vmin_in_out *in_out); - bool (*optimize_for_vmin)(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - - bool (*init_for_uclk_pstate)(struct dml2_pmo_init_for_pstate_support_in_out *in_out); - bool (*test_for_uclk_pstate)(struct dml2_pmo_test_for_pstate_support_in_out *in_out); - bool (*optimize_for_uclk_pstate)(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - - bool (*init_for_stutter)(struct dml2_pmo_init_for_stutter_in_out *in_out); - bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out); - bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out); - - struct dml2_pmo_init_data init_data; - struct dml2_pmo_scratch scratch; -}; - -/* -* DML2 MCache Types -*/ - -struct top_mcache_validate_admissability_in_out { - struct dml2_instance *dml2_instance; - - const struct dml2_display_cfg *display_cfg; - const struct core_display_cfg_support_info *cfg_support_info; - struct dml2_mcache_surface_allocation *mcache_allocations; - - bool per_plane_status[DML2_MAX_PLANES]; - - struct { - const struct dml_mode_support_info_st *mode_support_info; - } legacy; -}; - -struct top_mcache_assign_ids_in_out { - /* - * Input - */ - const struct dml2_mcache_surface_allocation *mcache_allocations; - int plane_count; - - int per_pipe_viewport_x_start[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; - int per_pipe_viewport_x_end[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; - int pipe_count_per_plane[DML2_MAX_PLANES]; - - struct dml2_display_mcache_regs *current_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp - - /* - * Output - */ - struct dml2_display_mcache_regs mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp - struct dml2_build_mcache_programming_in_out *mcache_programming; -}; - -struct top_mcache_calc_mcache_count_and_offsets_in_out { - /* - * Inputs - */ - struct dml2_instance *dml2_instance; - const struct dml2_display_cfg *display_config; - - /* - * Outputs - */ - struct dml2_mcache_surface_allocation *mcache_allocations; -}; - -struct top_mcache_assign_global_mcache_ids_in_out { - /* - * Inputs/Outputs - */ - struct dml2_mcache_surface_allocation *allocations; - int num_allocations; -}; - -/* -* DML2 Top Types -*/ - -struct dml2_initialize_instance_locals { - int dummy; -}; - -struct dml2_optimization_init_function_locals { - union { - struct { - struct dml2_pmo_init_for_pstate_support_in_out init_params; - } uclk_pstate; - struct { - struct dml2_pmo_init_for_stutter_in_out stutter_params; - } stutter; - struct { - struct dml2_pmo_init_for_vmin_in_out init_params; - } vmin; - }; -}; - -struct dml2_optimization_test_function_locals { - union { - struct { - struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; - struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; - struct top_mcache_validate_admissability_in_out validate_admissibility_params; - } test_mcache; - struct { - struct dml2_pmo_test_for_vmin_in_out pmo_test_vmin_params; - } test_vmin; - struct { - struct dml2_pmo_test_for_pstate_support_in_out test_params; - } uclk_pstate; - struct { - struct dml2_pmo_test_for_stutter_in_out stutter_params; - } stutter; - }; -}; - -struct dml2_optimization_optimize_function_locals { - union { - struct { - struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; - } optimize_mcache; - struct { - struct dml2_pmo_optimize_for_vmin_in_out pmo_optimize_vmin_params; - } optimize_vmin; - struct { - struct dml2_pmo_optimize_for_pstate_support_in_out optimize_params; - } uclk_pstate; - struct { - struct dml2_pmo_optimize_for_stutter_in_out stutter_params; - } stutter; - }; -}; - -struct dml2_optimization_phase_locals { - struct display_configuation_with_meta cur_candidate_display_cfg; - struct display_configuation_with_meta next_candidate_display_cfg; - struct dml2_core_mode_support_in_out mode_support_params; - struct dml2_optimization_init_function_locals init_function_locals; - struct dml2_optimization_test_function_locals test_function_locals; - struct dml2_optimization_optimize_function_locals optimize_function_locals; -}; - -struct dml2_check_mode_supported_locals { - struct dml2_display_cfg display_cfg_working_copy; - struct dml2_core_mode_support_in_out mode_support_params; - struct dml2_optimization_phase_locals optimization_phase_locals; - struct display_configuation_with_meta base_display_config_with_meta; - struct display_configuation_with_meta optimized_display_config_with_meta; - struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; -}; - -struct optimization_init_function_params { - struct dml2_optimization_init_function_locals *locals; - struct dml2_instance *dml; - struct display_configuation_with_meta *display_config; -}; - -struct optimization_test_function_params { - struct dml2_optimization_test_function_locals *locals; - struct dml2_instance *dml; - struct display_configuation_with_meta *display_config; -}; - -struct optimization_optimize_function_params { - bool last_candidate_supported; - struct dml2_optimization_optimize_function_locals *locals; - struct dml2_instance *dml; - struct display_configuation_with_meta *display_config; - struct display_configuation_with_meta *optimized_display_config; -}; - -struct optimization_phase_params { - struct dml2_instance *dml; - const struct display_configuation_with_meta *display_config; // Initial Display Configuration - bool (*init_function)(const struct optimization_init_function_params *params); // Test function to determine optimization is complete - bool (*test_function)(const struct optimization_test_function_params *params); // Test function to determine optimization is complete - bool (*optimize_function)(const struct optimization_optimize_function_params *params); // Function which produces a more optimized display configuration - struct display_configuation_with_meta *optimized_display_config; // The optimized display configuration - - bool all_or_nothing; -}; - -struct dml2_build_mode_programming_locals { - struct dml2_core_mode_support_in_out mode_support_params; - struct dml2_core_mode_programming_in_out mode_programming_params; - struct dml2_core_populate_informative_in_out informative_params; - struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; - struct display_configuation_with_meta base_display_config_with_meta; - struct display_configuation_with_meta optimized_display_config_with_meta; - struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; - struct dml2_dpmm_map_watermarks_params_in_out dppm_map_watermarks_params; - struct dml2_optimization_phase_locals optimization_phase_locals; - struct optimization_phase_params min_clock_for_latency_phase; - struct optimization_phase_params mcache_phase; - struct optimization_phase_params uclk_pstate_phase; - struct optimization_phase_params vmin_phase; - struct optimization_phase_params stutter_phase; -}; - -struct dml2_legacy_core_build_mode_programming_wrapper_locals { - struct dml2_core_mode_support_in_out mode_support_params; - struct dml2_core_mode_programming_in_out mode_programming_params; - struct dml2_core_populate_informative_in_out informative_params; - struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; - struct top_mcache_validate_admissability_in_out validate_admissibility_params; - struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; - struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; - struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; - struct dml2_display_cfg optimized_display_cfg; - struct core_display_cfg_support_info core_support_info; -}; - -struct dml2_top_mcache_verify_mcache_size_locals { - struct dml2_calculate_mcache_allocation_in_out calc_mcache_params; -}; - -struct dml2_top_mcache_validate_admissability_locals { - struct { - int pipe_vp_startx[DML2_MAX_DCN_PIPES]; - int pipe_vp_endx[DML2_MAX_DCN_PIPES]; - } plane0; - struct { - int pipe_vp_startx[DML2_MAX_DCN_PIPES]; - int pipe_vp_endx[DML2_MAX_DCN_PIPES]; - } plane1; -}; - -struct dml2_top_display_cfg_support_info { - const struct dml2_display_cfg *display_config; - struct core_display_cfg_support_info core_info; -}; - -struct dml2_top_funcs { - bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out); - bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out); - bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out); -}; - -struct dml2_instance { - enum dml2_project_id project_id; - - struct dml2_core_instance core_instance; - struct dml2_mcg_instance mcg_instance; - struct dml2_dpmm_instance dpmm_instance; - struct dml2_pmo_instance pmo_instance; - - struct dml2_soc_bb soc_bbox; - struct dml2_ip_capabilities ip_caps; - - struct dml2_mcg_min_clock_table min_clk_table; - struct dml2_pmo_options pmo_options; - struct dml2_top_funcs funcs; - - struct { - struct dml2_initialize_instance_locals initialize_instance_locals; - struct dml2_top_mcache_verify_mcache_size_locals mcache_verify_mcache_size_locals; - struct dml2_top_mcache_validate_admissability_locals mcache_validate_admissability_locals; - struct dml2_check_mode_supported_locals check_mode_supported_locals; - struct dml2_build_mode_programming_locals build_mode_programming_locals; - } scratch; - - struct { - struct { - struct dml2_legacy_core_build_mode_programming_wrapper_locals legacy_core_build_mode_programming_wrapper_locals; - } scratch; - } legacy; -}; -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c deleted file mode 100644 index 4cfe64aa8492..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ /dev/null @@ -1,1174 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dml2_mall_phantom.h" - -#include "dml2_dc_types.h" -#include "dml2_internal_types.h" -#include "dml2_utils.h" -#include "dml2_dc_resource_mgmt.h" - -#define MAX_ODM_FACTOR 4 -#define MAX_MPCC_FACTOR 4 - -struct dc_plane_pipe_pool { - int pipes_assigned_to_plane[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; - bool pipe_used[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; - int num_pipes_assigned_to_plane_for_mpcc_combine; - int num_pipes_assigned_to_plane_for_odm_combine; -}; - -struct dc_pipe_mapping_scratch { - struct { - unsigned int odm_factor; - unsigned int odm_slice_end_x[MAX_PIPES]; - struct pipe_ctx *next_higher_pipe_for_odm_slice[MAX_PIPES]; - } odm_info; - struct { - unsigned int mpc_factor; - struct pipe_ctx *prev_odm_pipe; - } mpc_info; - - struct dc_plane_pipe_pool pipe_pool; -}; - -static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) -{ - int i, j; - bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; - - if (!plane_id) - return false; - - for (i = 0; i < state->stream_count; i++) { - if (state->streams[i]->stream_id == stream_id) { - for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane && - (!is_plane_duplicate || (j == plane_index))) { - *plane_id = (i << 16) | j; - return true; - } - } - } - } - - return false; -} - -static int find_disp_cfg_idx_by_plane_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int plane_id) -{ - int i; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (mapping->disp_cfg_to_plane_id_valid[i] && mapping->disp_cfg_to_plane_id[i] == plane_id) - return i; - } - - ASSERT(false); - return __DML2_WRAPPER_MAX_STREAMS_PLANES__; -} - -static int find_disp_cfg_idx_by_stream_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int stream_id) -{ - int i; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (mapping->disp_cfg_to_stream_id_valid[i] && mapping->disp_cfg_to_stream_id[i] == stream_id) - return i; - } - - ASSERT(false); - return __DML2_WRAPPER_MAX_STREAMS_PLANES__; -} - -// The master pipe of a stream is defined as the top pipe in odm slice 0 -static struct pipe_ctx *find_master_pipe_of_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id) -{ - int i; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].stream && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { - if (!state->res_ctx.pipe_ctx[i].prev_odm_pipe && !state->res_ctx.pipe_ctx[i].top_pipe) - return &state->res_ctx.pipe_ctx[i]; - } - } - - return NULL; -} - -static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx, - struct dc_state *state, unsigned int plane_id) -{ - int i; - unsigned int plane_id_assigned_to_pipe; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, - state->res_ctx.pipe_ctx[i].stream->stream_id, - ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { - if (plane_id_assigned_to_pipe == plane_id) - return &state->res_ctx.pipe_ctx[i]; - } - } - - return NULL; -} - -static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, - struct dc_state *state, unsigned int plane_id, unsigned int *pipes) -{ - int i; - unsigned int num_found = 0; - unsigned int plane_id_assigned_to_pipe = -1; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - if (!pipe->plane_state || !pipe->stream) - continue; - - get_plane_id(ctx, state, pipe->plane_state, pipe->stream->stream_id, - ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[pipe->pipe_idx], - &plane_id_assigned_to_pipe); - if (plane_id_assigned_to_pipe == plane_id && !pipe->prev_odm_pipe - && (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) { - while (pipe) { - struct pipe_ctx *mpc_pipe = pipe; - - while (mpc_pipe) { - pipes[num_found++] = mpc_pipe->pipe_idx; - mpc_pipe = mpc_pipe->bottom_pipe; - if (!mpc_pipe) - break; - if (mpc_pipe->plane_state != pipe->plane_state) - mpc_pipe = NULL; - } - pipe = pipe->next_odm_pipe; - } - break; - } - } - - return num_found; -} - -static bool validate_pipe_assignment(const struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, const struct dml2_dml_to_dc_pipe_mapping *mapping) -{ -// int i, j, k; -// -// unsigned int plane_id; -// -// unsigned int disp_cfg_index; -// -// unsigned int pipes_assigned_to_plane[MAX_PIPES]; -// unsigned int num_pipes_assigned_to_plane; -// -// struct pipe_ctx *top_pipe; -// -// for (i = 0; i < state->stream_count; i++) { -// for (j = 0; j < state->stream_status[i]->plane_count; j++) { -// if (get_plane_id(state, state->stream_status.plane_states[j], &plane_id)) { -// disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); -// num_pipes_assigned_to_plane = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes_assigned_to_plane); -// -// if (disp_cfg_index >= 0 && num_pipes_assigned_to_plane > 0) { -// // Verify the number of pipes assigned matches -// if (disp_cfg->hw.DPPPerSurface != num_pipes_assigned_to_plane) -// return false; -// -// top_pipe = find_top_pipe_in_tree(state->res_ctx.pipe_ctx[pipes_assigned_to_plane[0]]); -// -// // Verify MPC and ODM combine -// if (disp_cfg->hw.ODMMode == dml_odm_mode_bypass) { -// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, false); -// } else { -// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, true); -// } -// -// // TODO: could also do additional verification that the pipes in tree are the same as -// // pipes_assigned_to_plane -// } else { -// ASSERT(false); -// return false; -// } -// } else { -// ASSERT(false); -// return false; -// } -// } -// } - return true; -} - -static bool is_plane_using_pipe(const struct pipe_ctx *pipe) -{ - if (pipe->plane_state) - return true; - - return false; -} - -static bool is_pipe_free(const struct pipe_ctx *pipe) -{ - if (!pipe->plane_state && !pipe->stream) - return true; - - return false; -} - -static unsigned int find_preferred_pipe_candidates(const struct dc_state *existing_state, - const int pipe_count, - const unsigned int stream_id, - unsigned int *preferred_pipe_candidates) -{ - unsigned int num_preferred_candidates = 0; - int i; - - /* There is only one case which we consider for adding a pipe to the preferred - * pipe candidate array: - * - * 1. If the existing stream id of the pipe is equivalent to the stream id - * of the stream we are trying to achieve MPC/ODM combine for. This allows - * us to minimize the changes in pipe topology during the transition. - * - * However this condition comes with a caveat. We need to ignore pipes that will - * require a change in OPP but still have the same stream id. For example during - * an MPC to ODM transiton. - * - * Adding check to avoid pipe select on the head pipe by utilizing dc resource - * helper function resource_get_primary_dpp_pipe and comparing the pipe index. - */ - if (existing_state) { - for (i = 0; i < pipe_count; i++) { - if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { - struct pipe_ctx *head_pipe = - resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? - resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : - NULL; - - // we should always respect the head pipe from selection - if (head_pipe && head_pipe->pipe_idx == i) - continue; - if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && - (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || - existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) - continue; - - preferred_pipe_candidates[num_preferred_candidates++] = i; - } - } - } - - return num_preferred_candidates; -} - -static unsigned int find_last_resort_pipe_candidates(const struct dc_state *existing_state, - const int pipe_count, - const unsigned int stream_id, - unsigned int *last_resort_pipe_candidates) -{ - unsigned int num_last_resort_candidates = 0; - int i; - - /* There are two cases where we would like to add a given pipe into the last - * candidate array: - * - * 1. If the pipe requires a change in OPP, for example during an MPC - * to ODM transiton. - * - * 2. If the pipe already has an enabled OTG. - */ - if (existing_state) { - for (i = 0; i < pipe_count; i++) { - struct pipe_ctx *head_pipe = - resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? - resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : - NULL; - - // we should always respect the head pipe from selection - if (head_pipe && head_pipe->pipe_idx == i) - continue; - if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || - existing_state->res_ctx.pipe_ctx[i].stream_res.tg) - last_resort_pipe_candidates[num_last_resort_candidates++] = i; - } - } - - return num_last_resort_candidates; -} - -static bool is_pipe_in_candidate_array(const unsigned int pipe_idx, - const unsigned int *candidate_array, - const unsigned int candidate_array_size) -{ - int i; - - for (i = 0; i < candidate_array_size; i++) { - if (candidate_array[i] == pipe_idx) - return true; - } - - return false; -} - -static bool find_more_pipes_for_stream(struct dml2_context *ctx, - struct dc_state *state, // The state we want to find a free mapping in - unsigned int stream_id, // The stream we want this pipe to drive - int *assigned_pipes, - int *assigned_pipe_count, - int pipes_needed, - const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to -{ - struct pipe_ctx *pipe = NULL; - unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; - unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; - unsigned int num_preferred_candidates = 0; - unsigned int num_last_resort_candidates = 0; - int i; - - if (existing_state) { - num_preferred_candidates = - find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); - - num_last_resort_candidates = - find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); - } - - // First see if any of the preferred are unmapped, and choose those instead - for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { - pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; - if (!is_plane_using_pipe(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = preferred_pipe_candidates[i]; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - // We like to pair pipes starting from the higher order indicies for combining - for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { - // Ignore any pipes that are the preferred or last resort candidate - if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || - is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) - continue; - - pipe = &state->res_ctx.pipe_ctx[i]; - if (!is_plane_using_pipe(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = i; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - // Only use the last resort pipe candidates as a last resort - for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { - pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; - if (!is_plane_using_pipe(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = last_resort_pipe_candidates[i]; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - ASSERT(pipes_needed <= 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available - - return pipes_needed <= 0; -} - -static bool find_more_free_pipes(struct dml2_context *ctx, - struct dc_state *state, // The state we want to find a free mapping in - unsigned int stream_id, // The stream we want this pipe to drive - int *assigned_pipes, - int *assigned_pipe_count, - int pipes_needed, - const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to -{ - struct pipe_ctx *pipe = NULL; - unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; - unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; - unsigned int num_preferred_candidates = 0; - unsigned int num_last_resort_candidates = 0; - int i; - - if (existing_state) { - num_preferred_candidates = - find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); - - num_last_resort_candidates = - find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); - } - - // First see if any of the preferred are unmapped, and choose those instead - for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { - pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; - if (is_pipe_free(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = preferred_pipe_candidates[i]; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - // We like to pair pipes starting from the higher order indicies for combining - for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { - // Ignore any pipes that are the preferred or last resort candidate - if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || - is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) - continue; - - pipe = &state->res_ctx.pipe_ctx[i]; - if (is_pipe_free(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = i; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - // Only use the last resort pipe candidates as a last resort - for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { - pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; - if (is_pipe_free(pipe)) { - pipes_needed--; - // TODO: This doens't make sense really, pipe_idx should always be valid - pipe->pipe_idx = last_resort_pipe_candidates[i]; - assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; - } - } - - ASSERT(pipes_needed == 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available - - return pipes_needed == 0; -} - -static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) -{ - bool sorted, swapped; - unsigned int cur_index; - int odm_slice_index; - - for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) { - // Sort each MPCC set - //Un-optimized bubble sort, but that's okay for array sizes <= 6 - - if (pipes->num_pipes_assigned_to_plane_for_mpcc_combine <= 1) - sorted = true; - else - sorted = false; - - cur_index = 0; - swapped = false; - while (!sorted) { - if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) { - swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1], - pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]); - - swapped = true; - } - - cur_index++; - - if (cur_index == pipes->num_pipes_assigned_to_plane_for_mpcc_combine - 1) { - cur_index = 0; - - if (swapped) - sorted = false; - else - sorted = true; - - swapped = false; - } - - } - } -} - -// For example, 3840 x 2160, ODM2:1 has a slice array of [1919, 3839], meaning, slice0 spans h_pixels 0->1919, and slice1 spans 1920->3840 -static void calculate_odm_slices(const struct dc_stream_state *stream, unsigned int odm_factor, unsigned int *odm_slice_end_x) -{ - unsigned int slice_size = 0; - int i; - - if (odm_factor < 1 || odm_factor > 4) { - ASSERT(false); - return; - } - - slice_size = stream->src.width / odm_factor; - - for (i = 0; i < odm_factor; i++) - odm_slice_end_x[i] = (slice_size * (i + 1)) - 1; - - odm_slice_end_x[odm_factor - 1] = stream->src.width - 1; -} - -static void add_odm_slice_to_odm_tree(struct dml2_context *ctx, - struct dc_state *state, - struct dc_pipe_mapping_scratch *scratch, - unsigned int odm_slice_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - // MPCC Combine + ODM Combine is not supported, so there should never be a case where the current plane - // has more than 1 pipe mapped to it for a given slice. - ASSERT(scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine == 1 || scratch->pipe_pool.num_pipes_assigned_to_plane_for_odm_combine == 1); - - for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { - pipe = &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]]; - - if (scratch->mpc_info.prev_odm_pipe) - scratch->mpc_info.prev_odm_pipe->next_odm_pipe = pipe; - - pipe->prev_odm_pipe = scratch->mpc_info.prev_odm_pipe; - pipe->next_odm_pipe = NULL; - } - scratch->mpc_info.prev_odm_pipe = pipe; -} - -static struct pipe_ctx *add_plane_to_blend_tree(struct dml2_context *ctx, - struct dc_state *state, - const struct dc_plane_state *plane, - struct dc_plane_pipe_pool *pipe_pool, - unsigned int odm_slice, - struct pipe_ctx *top_pipe) -{ - int i; - - for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { - if (top_pipe) - top_pipe->bottom_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; - - pipe_pool->pipe_used[odm_slice][i] = true; - - state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].top_pipe = top_pipe; - state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].bottom_pipe = NULL; - - top_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; - } - - // After running the above loop, the top pipe actually ends up pointing to the bottom of this MPCC combine tree, so we are actually - // returning the bottom pipe here - return top_pipe; -} - -static unsigned int find_pipes_assigned_to_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id, unsigned int *pipes) -{ - int i; - unsigned int num_found = 0; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - if (pipe->stream && pipe->stream->stream_id == stream_id && !pipe->top_pipe && !pipe->prev_odm_pipe) { - while (pipe) { - pipes[num_found++] = pipe->pipe_idx; - pipe = pipe->next_odm_pipe; - } - break; - } - } - - return num_found; -} - -static struct pipe_ctx *assign_pipes_to_stream(struct dml2_context *ctx, struct dc_state *state, - const struct dc_stream_state *stream, - int odm_factor, - struct dc_plane_pipe_pool *pipe_pool, - const struct dc_state *existing_state) -{ - struct pipe_ctx *master_pipe; - unsigned int pipes_needed; - unsigned int pipes_assigned; - unsigned int pipes[MAX_PIPES] = {0}; - unsigned int next_pipe_to_assign; - int odm_slice; - - pipes_needed = odm_factor; - - master_pipe = find_master_pipe_of_stream(ctx, state, stream->stream_id); - ASSERT(master_pipe); - - pipes_assigned = find_pipes_assigned_to_stream(ctx, state, stream->stream_id, pipes); - - find_more_free_pipes(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); - - ASSERT(pipes_assigned == pipes_needed); - - next_pipe_to_assign = 0; - for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) - pipe_pool->pipes_assigned_to_plane[odm_slice][0] = pipes[next_pipe_to_assign++]; - - pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = 1; - pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; - - return master_pipe; -} - -static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct dc_state *state, - const struct dc_stream_state *stream, - const struct dc_plane_state *plane, - int odm_factor, - int mpc_factor, - int plane_index, - struct dc_plane_pipe_pool *pipe_pool, - const struct dc_state *existing_state) -{ - struct pipe_ctx *master_pipe = NULL; - unsigned int plane_id; - unsigned int pipes_needed; - unsigned int pipes_assigned; - unsigned int pipes[MAX_PIPES] = {0}; - unsigned int next_pipe_to_assign; - int odm_slice, mpc_slice; - - if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { - ASSERT(false); - return master_pipe; - } - - pipes_needed = mpc_factor * odm_factor; - - master_pipe = find_master_pipe_of_plane(ctx, state, plane_id); - ASSERT(master_pipe); - - pipes_assigned = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes); - - find_more_pipes_for_stream(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); - - ASSERT(pipes_assigned >= pipes_needed); - - next_pipe_to_assign = 0; - for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) - for (mpc_slice = 0; mpc_slice < mpc_factor; mpc_slice++) - pipe_pool->pipes_assigned_to_plane[odm_slice][mpc_slice] = pipes[next_pipe_to_assign++]; - - pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = mpc_factor; - pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; - - return master_pipe; -} - -static bool is_pipe_used(const struct dc_plane_pipe_pool *pool, unsigned int pipe_idx) -{ - int i, j; - - for (i = 0; i < pool->num_pipes_assigned_to_plane_for_odm_combine; i++) { - for (j = 0; j < pool->num_pipes_assigned_to_plane_for_mpcc_combine; j++) { - if (pool->pipes_assigned_to_plane[i][j] == pipe_idx && pool->pipe_used[i][j]) - return true; - } - } - - return false; -} - -static void free_pipe(struct pipe_ctx *pipe) -{ - memset(pipe, 0, sizeof(struct pipe_ctx)); -} - -static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, - const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index) -{ - int i; - bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state == plane && - state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id && - (!is_plane_duplicate || - ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index) && - !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) { - free_pipe(&state->res_ctx.pipe_ctx[i]); - } - } -} - -static void remove_pipes_from_blend_trees(struct dml2_context *ctx, struct dc_state *state, struct dc_plane_pipe_pool *pipe_pool, unsigned int odm_slice) -{ - struct pipe_ctx *pipe; - int i; - - for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { - pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][0]]; - if (pipe->top_pipe) - pipe->top_pipe->bottom_pipe = pipe->bottom_pipe; - - if (pipe->bottom_pipe) - pipe->bottom_pipe = pipe->top_pipe; - - pipe_pool->pipe_used[odm_slice][i] = true; - } -} - -static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, - struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) -{ - int odm_slice_index; - struct pipe_ctx *master_pipe = NULL; - - - master_pipe = assign_pipes_to_stream(ctx, state, stream, scratch->odm_info.odm_factor, &scratch->pipe_pool, existing_state); - sort_pipes_for_splitting(&scratch->pipe_pool); - - for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { - remove_pipes_from_blend_trees(ctx, state, &scratch->pipe_pool, odm_slice_index); - - add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); - - ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, - master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][0]], true); - } -} - -static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane, - int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) -{ - int odm_slice_index; - unsigned int plane_id; - struct pipe_ctx *master_pipe = NULL; - int i; - - if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { - ASSERT(false); - return; - } - - master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, - scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state); - sort_pipes_for_splitting(&scratch->pipe_pool); - - for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { - // Now we have a list of all pipes to be used for this plane/stream, now setup the tree. - scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index] = add_plane_to_blend_tree(ctx, state, - plane, - &scratch->pipe_pool, - odm_slice_index, - scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index]); - - add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); - - for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { - - ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, - master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]], true); - } - } - - free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index); -} - -static unsigned int get_target_mpc_factor(struct dml2_context *ctx, - struct dc_state *state, - const struct dml_display_cfg_st *disp_cfg, - struct dml2_dml_to_dc_pipe_mapping *mapping, - const struct dc_stream_status *status, - const struct dc_stream_state *stream, - int plane_idx) -{ - unsigned int plane_id; - unsigned int cfg_idx; - unsigned int mpc_factor; - - if (ctx->architecture == dml2_architecture_20) { - get_plane_id(ctx, state, status->plane_states[plane_idx], - stream->stream_id, plane_idx, &plane_id); - cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); - mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; - } else if (ctx->architecture == dml2_architecture_21) { - if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { - struct dc_stream_state *main_stream; - struct dc_stream_status *main_stream_status; - - /* get stream id of main stream */ - main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); - if (!main_stream) { - ASSERT(false); - return 1; - } - - main_stream_status = ctx->config.callbacks.get_stream_status(state, main_stream); - if (!main_stream_status) { - ASSERT(false); - return 1; - } - - /* get plane id for associated main plane */ - get_plane_id(ctx, state, main_stream_status->plane_states[plane_idx], - main_stream->stream_id, plane_idx, &plane_id); - } else { - get_plane_id(ctx, state, status->plane_states[plane_idx], - stream->stream_id, plane_idx, &plane_id); - } - - cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); - mpc_factor = ctx->v21.mode_programming.programming->plane_programming[cfg_idx].num_dpps_required; - } else { - mpc_factor = 1; - ASSERT(false); - } - - /* For stereo timings, we need to pipe split */ - if (dml2_is_stereo_timing(stream)) - mpc_factor = 2; - - return mpc_factor; -} - -static unsigned int get_target_odm_factor( - const struct dml2_context *ctx, - struct dc_state *state, - const struct dml_display_cfg_st *disp_cfg, - struct dml2_dml_to_dc_pipe_mapping *mapping, - const struct dc_stream_state *stream) -{ - unsigned int cfg_idx; - - if (ctx->architecture == dml2_architecture_20) { - cfg_idx = find_disp_cfg_idx_by_stream_id( - mapping, stream->stream_id); - switch (disp_cfg->hw.ODMMode[cfg_idx]) { - case dml_odm_mode_bypass: - return 1; - case dml_odm_mode_combine_2to1: - return 2; - case dml_odm_mode_combine_4to1: - return 4; - default: - break; - } - } else if (ctx->architecture == dml2_architecture_21) { - if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { - struct dc_stream_state *main_stream; - - /* get stream id of main stream */ - main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); - if (!main_stream) - goto failed; - - /* get cfg idx for associated main stream */ - cfg_idx = find_disp_cfg_idx_by_stream_id( - mapping, main_stream->stream_id); - } else { - cfg_idx = find_disp_cfg_idx_by_stream_id( - mapping, stream->stream_id); - } - - return ctx->v21.mode_programming.programming->stream_programming[cfg_idx].num_odms_required; - } - -failed: - ASSERT(false); - return 1; -} - -static unsigned int get_source_odm_factor(const struct dml2_context *ctx, - struct dc_state *state, - const struct dc_stream_state *stream) -{ - struct pipe_ctx *otg_master = ctx->config.callbacks.get_otg_master_for_stream(&state->res_ctx, stream); - - if (!otg_master) - return 0; - - return ctx->config.callbacks.get_odm_slice_count(otg_master); -} - -static unsigned int get_source_mpc_factor(const struct dml2_context *ctx, - struct dc_state *state, - const struct dc_plane_state *plane) -{ - struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0}; - int dpp_pipe_count = ctx->config.callbacks.get_dpp_pipes_for_plane(plane, - &state->res_ctx, dpp_pipes); - - ASSERT(dpp_pipe_count > 0); - return ctx->config.callbacks.get_mpc_slice_count(dpp_pipes[0]); -} - - -static void populate_mpc_factors_for_stream( - struct dml2_context *ctx, - const struct dml_display_cfg_st *disp_cfg, - struct dml2_dml_to_dc_pipe_mapping *mapping, - struct dc_state *state, - unsigned int stream_idx, - struct dml2_pipe_combine_factor odm_factor, - struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) -{ - const struct dc_stream_status *status = &state->stream_status[stream_idx]; - int i; - - for (i = 0; i < status->plane_count; i++) { - mpc_factors[i].source = get_source_mpc_factor(ctx, state, status->plane_states[i]); - mpc_factors[i].target = (odm_factor.target == 1) ? - get_target_mpc_factor(ctx, state, disp_cfg, mapping, status, state->streams[stream_idx], i) : 1; - } -} - -static void populate_odm_factors(const struct dml2_context *ctx, - const struct dml_display_cfg_st *disp_cfg, - struct dml2_dml_to_dc_pipe_mapping *mapping, - struct dc_state *state, - struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]) -{ - int i; - - for (i = 0; i < state->stream_count; i++) { - odm_factors[i].source = get_source_odm_factor(ctx, state, state->streams[i]); - odm_factors[i].target = get_target_odm_factor( - ctx, state, disp_cfg, mapping, state->streams[i]); - } -} - -static bool unmap_dc_pipes_for_stream(struct dml2_context *ctx, - struct dc_state *state, - const struct dc_state *existing_state, - const struct dc_stream_state *stream, - const struct dc_stream_status *status, - struct dml2_pipe_combine_factor odm_factor, - struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) -{ - int plane_idx; - bool result = true; - - for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) - if (mpc_factors[plane_idx].target < mpc_factors[plane_idx].source) - result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( - state, - existing_state, - ctx->config.callbacks.dc->res_pool, - status->plane_states[plane_idx], - mpc_factors[plane_idx].target); - if (odm_factor.target < odm_factor.source) - result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( - state, - existing_state, - ctx->config.callbacks.dc->res_pool, - stream, - odm_factor.target); - return result; -} - -static bool map_dc_pipes_for_stream(struct dml2_context *ctx, - struct dc_state *state, - const struct dc_state *existing_state, - const struct dc_stream_state *stream, - const struct dc_stream_status *status, - struct dml2_pipe_combine_factor odm_factor, - struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) -{ - int plane_idx; - bool result = true; - - for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) - if (mpc_factors[plane_idx].target > mpc_factors[plane_idx].source) - result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( - state, - existing_state, - ctx->config.callbacks.dc->res_pool, - status->plane_states[plane_idx], - mpc_factors[plane_idx].target); - if (odm_factor.target > odm_factor.source) - result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( - state, - existing_state, - ctx->config.callbacks.dc->res_pool, - stream, - odm_factor.target); - return result; -} - -static bool map_dc_pipes_with_callbacks(struct dml2_context *ctx, - struct dc_state *state, - const struct dml_display_cfg_st *disp_cfg, - struct dml2_dml_to_dc_pipe_mapping *mapping, - const struct dc_state *existing_state) -{ - int i; - bool result = true; - - populate_odm_factors(ctx, disp_cfg, mapping, state, ctx->pipe_combine_scratch.odm_factors); - for (i = 0; i < state->stream_count; i++) - populate_mpc_factors_for_stream(ctx, disp_cfg, mapping, state, - i, ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); - for (i = 0; i < state->stream_count; i++) - result &= unmap_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], - &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); - for (i = 0; i < state->stream_count; i++) - result &= map_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], - &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); - - return result; -} - -bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state) -{ - int stream_index, plane_index, i; - - unsigned int stream_disp_cfg_index; - unsigned int plane_disp_cfg_index; - unsigned int disp_cfg_index_max; - - unsigned int plane_id; - unsigned int stream_id; - - const unsigned int *ODMMode, *DPPPerSurface; - unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; - struct dc_pipe_mapping_scratch scratch; - - if (ctx->config.map_dc_pipes_with_callbacks) - return map_dc_pipes_with_callbacks( - ctx, state, disp_cfg, mapping, existing_state); - - if (ctx->architecture == dml2_architecture_21) { - /* - * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes. - * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to - * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const. - * - */ - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - odm_mode_array[i] = ctx->v21.mode_programming.programming->stream_programming[i].num_odms_required; - dpp_per_surface_array[i] = ctx->v21.mode_programming.programming->plane_programming[i].num_dpps_required; - } - - ODMMode = (const unsigned int *)odm_mode_array; - DPPPerSurface = (const unsigned int *)dpp_per_surface_array; - disp_cfg_index_max = __DML2_WRAPPER_MAX_STREAMS_PLANES__; - } else { - ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; - DPPPerSurface = disp_cfg->hw.DPPPerSurface; - disp_cfg_index_max = __DML_NUM_PLANES__; - } - - for (stream_index = 0; stream_index < state->stream_count; stream_index++) { - memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch)); - - stream_id = state->streams[stream_index]->stream_id; - stream_disp_cfg_index = find_disp_cfg_idx_by_stream_id(mapping, stream_id); - if (stream_disp_cfg_index >= disp_cfg_index_max) - continue; - - if (ctx->architecture == dml2_architecture_20) { - if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { - scratch.odm_info.odm_factor = 1; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { - scratch.odm_info.odm_factor = 2; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { - scratch.odm_info.odm_factor = 4; - } else { - ASSERT(false); - scratch.odm_info.odm_factor = 1; - } - } else if (ctx->architecture == dml2_architecture_21) { - /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0. - * This is not an issue with new resource management logic. This block ensure backcompat - * with legacy pipe management with updated DML. - * */ - if (ODMMode[stream_disp_cfg_index] == 1) { - scratch.odm_info.odm_factor = 1; - } else if (ODMMode[stream_disp_cfg_index] == 2) { - scratch.odm_info.odm_factor = 2; - } else if (ODMMode[stream_disp_cfg_index] == 4) { - scratch.odm_info.odm_factor = 4; - } else { - ASSERT(false); - scratch.odm_info.odm_factor = 1; - } - } - calculate_odm_slices(state->streams[stream_index], scratch.odm_info.odm_factor, scratch.odm_info.odm_slice_end_x); - - // If there are no planes, you still want to setup ODM... - if (state->stream_status[stream_index].plane_count == 0) { - map_pipes_for_stream(ctx, state, state->streams[stream_index], &scratch, existing_state); - } - - for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) { - // Planes are ordered top to bottom. - if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index], - stream_id, plane_index, &plane_id)) { - plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); - - // Setup mpc_info for this plane - scratch.mpc_info.prev_odm_pipe = NULL; - if (scratch.odm_info.odm_factor == 1 && plane_disp_cfg_index < disp_cfg_index_max) { - // If ODM combine is not inuse, then the number of pipes - // per plane is determined by MPC combine factor - scratch.mpc_info.mpc_factor = DPPPerSurface[plane_disp_cfg_index]; - - //For stereo timings, we need to pipe split - if (dml2_is_stereo_timing(state->streams[stream_index])) - scratch.mpc_info.mpc_factor = 2; - } else { - // If ODM combine is enabled, then we use at most 1 pipe per - // odm slice per plane, i.e. MPC combine is never used - scratch.mpc_info.mpc_factor = 1; - } - - ASSERT(scratch.odm_info.odm_factor * scratch.mpc_info.mpc_factor > 0); - - // Clear the pool assignment scratch (which is per plane) - memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool)); - - map_pipes_for_plane(ctx, state, state->streams[stream_index], - state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state); - } else { - // Plane ID cannot be generated, therefore no DML mapping can be performed. - ASSERT(false); - } - } - - } - - if (!validate_pipe_assignment(ctx, state, disp_cfg, mapping)) - ASSERT(false); - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!ctx->config.callbacks.build_scaling_params(pipe)) { - ASSERT(false); - } - } - - if (ctx->config.callbacks.build_test_pattern_params && - pipe->stream && - pipe->prev_odm_pipe == NULL && - pipe->top_pipe == NULL) - ctx->config.callbacks.build_test_pattern_params(&state->res_ctx, pipe); - } - - return true; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h deleted file mode 100644 index 1538b708d8be..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_DC_RESOURCE_MGMT_H__ -#define __DML2_DC_RESOURCE_MGMT_H__ - -#include "dml2_dc_types.h" - -struct dml2_context; -struct dml2_dml_to_dc_pipe_mapping; -struct dml_display_cfg_st; - -/* - * dml2_map_dc_pipes - Creates a pipe linkage in dc_state based on current display config. - * @ctx: Input dml2 context - * @state: Current dc_state to be updated. - * @disp_cfg: Current display config. - * @mapping: Pipe mapping logic structure to keep a track of pipes to be used. - * - * Based on ODM and DPPPersurface outputs calculated by the DML for the current display - * config, create a pipe linkage in dc_state which is then used by DC core. - * Make this function generic to be used by multiple DML versions. - * - * Return: True if pipe mapping and linking is successful, false otherwise. - */ - -bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h deleted file mode 100644 index 7ca7f2a743c2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -/* - * Wrapper header for externally defined types from DC. These types come from - * dc headers when building DML2 as part of DC, but are defined here when building - * DML2 as a standalone library (such as for unit testing). - */ - -#ifndef __DML2_DC_TYPES_H__ -#define __DML2_DC_TYPES_H__ - -#include "resource.h" -#include "core_types.h" -#include "dsc.h" -#include "clk_mgr.h" -#include "dc_state_priv.h" - -#endif //__DML2_DC_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h deleted file mode 100644 index 140ec01545db..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ /dev/null @@ -1,157 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_INTERNAL_TYPES_H__ -#define __DML2_INTERNAL_TYPES_H__ - -#include "dml2_dc_types.h" -#include "display_mode_core.h" -#include "dml2_wrapper.h" -#include "dml2_policy.h" - -#include "dml_top.h" -#include "dml21_wrapper.h" - -struct dml2_wrapper_optimize_configuration_params { - struct display_mode_lib_st *dml_core_ctx; - struct dml2_configuration_options *config; - struct ip_params_st *ip_params; - struct dml_display_cfg_st *cur_display_config; - struct dml_display_cfg_st *new_display_config; - const struct dml_mode_support_info_st *cur_mode_support_info; - struct dml_mode_eval_policy_st *cur_policy; - struct dml_mode_eval_policy_st *new_policy; -}; - -struct dml2_calculate_lowest_supported_state_for_temp_read_scratch { - struct dml_mode_support_info_st evaluation_info; - dml_float_t uclk_change_latencies[__DML_MAX_STATE_ARRAY_SIZE__]; - struct dml_display_cfg_st cur_display_config; - struct dml_display_cfg_st new_display_config; - struct dml_mode_eval_policy_st new_policy; - struct dml_mode_eval_policy_st cur_policy; -}; - -struct dml2_create_scratch { - struct dml2_policy_build_synthetic_soc_states_scratch build_synthetic_socbb_scratch; - struct soc_states_st in_states; -}; - -struct dml2_calculate_rq_and_dlg_params_scratch { - struct _vcs_dpi_dml_display_rq_regs_st rq_regs; - struct _vcs_dpi_dml_display_dlg_regs_st disp_dlg_regs; - struct _vcs_dpi_dml_display_ttu_regs_st disp_ttu_regs; -}; - -#define __DML2_WRAPPER_MAX_STREAMS_PLANES__ 6 - -struct dml2_dml_to_dc_pipe_mapping { - unsigned int disp_cfg_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - bool disp_cfg_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - unsigned int disp_cfg_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - bool disp_cfg_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - unsigned int dml_pipe_idx_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; - bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; -}; - -struct dml2_wrapper_scratch { - struct dml_display_cfg_st cur_display_config; - struct dml_display_cfg_st new_display_config; - struct dml_mode_eval_policy_st new_policy; - struct dml_mode_eval_policy_st cur_policy; - struct dml_mode_support_info_st mode_support_info; - struct dml_mode_support_ex_params_st mode_support_params; - - struct dummy_pstate_entry dummy_pstate_table[4]; - - struct dml2_create_scratch create_scratch; - struct dml2_calculate_lowest_supported_state_for_temp_read_scratch dml2_calculate_lowest_supported_state_for_temp_read_scratch; - struct dml2_calculate_rq_and_dlg_params_scratch calculate_rq_and_dlg_params_scratch; - - struct dml2_wrapper_optimize_configuration_params optimize_configuration_params; - struct dml2_policy_build_synthetic_soc_states_params build_synthetic_socbb_params; - - struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; - bool enable_flexible_pipe_mapping; - bool plane_duplicate_exists; - int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS]; -}; - -struct dml2_helper_det_policy_scratch { - int dpps_per_surface[MAX_PLANES]; -}; - -enum dml2_architecture { - dml2_architecture_20, - dml2_architecture_21 -}; - -struct prepare_mcache_programming_locals { - struct dml2_build_mcache_programming_in_out build_mcache_programming_params; -}; - -struct dml21_wrapper_scratch { - struct prepare_mcache_programming_locals prepare_mcache_locals; - struct pipe_ctx temp_pipe; -}; - -struct dml2_pipe_combine_factor { - unsigned int source; - unsigned int target; -}; - -struct dml2_pipe_combine_scratch { - struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]; - struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES][MAX_PIPES]; -}; - -struct dml2_context { - enum dml2_architecture architecture; - struct dml2_configuration_options config; - struct dml2_helper_det_policy_scratch det_helper_scratch; - struct dml2_pipe_combine_scratch pipe_combine_scratch; - union { - struct { - struct display_mode_lib_st dml_core_ctx; - struct dml2_wrapper_scratch scratch; - struct dcn_watermarks g6_temp_read_watermark_set; - } v20; - struct { - struct dml21_wrapper_scratch scratch; - struct dml2_initialize_instance_in_out dml_init; - struct dml2_display_cfg display_config; - struct dml2_check_mode_supported_in_out mode_support; - struct dml2_build_mode_programming_in_out mode_programming; - struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; - } v21; - }; -}; - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c deleted file mode 100644 index c59f825cfae9..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ /dev/null @@ -1,910 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dml2_dc_types.h" -#include "dml2_internal_types.h" -#include "dml2_utils.h" -#include "dml2_mall_phantom.h" - -unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context) -{ - uint32_t num_ways = 0; - uint32_t bytes_per_pixel = 0; - uint32_t cache_lines_used = 0; - uint32_t lines_per_way = 0; - uint32_t total_cache_lines = 0; - uint32_t bytes_in_mall = 0; - uint32_t num_mblks = 0; - uint32_t cache_lines_per_plane = 0; - uint32_t i = 0; - uint32_t mblk_width = 0; - uint32_t mblk_height = 0; - uint32_t full_vp_width_blk_aligned = 0; - uint32_t mall_alloc_width_blk_aligned = 0; - uint32_t mall_alloc_height_blk_aligned = 0; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // Find the phantom pipes - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && - ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { - bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mblk_width = ctx->config.mall_cfg.mblk_width_pixels; - mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels; - - /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - - * FLOOR(vp_x_start, blk_width) - */ - full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + - pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + - (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); - - /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ - mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; - - /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / - mblk_height * mblk_height + mblk_height; - - /* full_mblk_width_ub_l/c = malldml2_mall_phantom.c_alloc_width_blk_aligned_l/c; - * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; - * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); - * (Should be divisible, but round up if not) - */ - num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * - ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); - bytes_in_mall = num_mblks * ctx->config.mall_cfg.mblk_size_bytes; - // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment - // (MALL is 64-byte aligned) - cache_lines_per_plane = bytes_in_mall / ctx->config.mall_cfg.cache_line_size_bytes + 2; - - // For DCC we must cache the meat surface, so double cache lines required - if (pipe->plane_state->dcc.enable) - cache_lines_per_plane *= 2; - cache_lines_used += cache_lines_per_plane; - } - } - - total_cache_lines = ctx->config.mall_cfg.max_cab_allocation_bytes / ctx->config.mall_cfg.cache_line_size_bytes; - lines_per_way = total_cache_lines / ctx->config.mall_cfg.cache_num_ways; - num_ways = cache_lines_used / lines_per_way; - if (cache_lines_used % lines_per_way > 0) - num_ways++; - - return num_ways; -} - -static void merge_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *context) -{ - int i; - - /* merge pipes if necessary */ - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // For now merge all pipes for SubVP since pipe split case isn't supported yet - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - ctx->config.svp_pstate.callbacks.release_dsc(&context->res_ctx, ctx->config.svp_pstate.callbacks.dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - } - } -} - -static bool all_pipes_have_stream_and_plane(struct dml2_context *ctx, const struct dc_state *context) -{ - int i; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (!pipe->plane_state) - return false; - } - return true; -} - -static bool mpo_in_use(const struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) - return true; - } - return false; -} - -/* - * dcn32_get_num_free_pipes: Calculate number of free pipes - * - * This function assumes that a "used" pipe is a pipe that has - * both a stream and a plane assigned to it. - * - * @dc: current dc state - * @context: new dc state - * - * Return: - * Number of free pipes available in the context - */ -static unsigned int get_num_free_pipes(struct dml2_context *ctx, struct dc_state *state) -{ - unsigned int i; - unsigned int free_pipes = 0; - unsigned int num_pipes = 0; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - if (pipe->stream && !pipe->top_pipe) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - } - } - - free_pipes = ctx->config.dcn_pipe_count - num_pipes; - return free_pipes; -} - -/* - * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. - * - * We enter this function if we are Sub-VP capable (i.e. enough pipes available) - * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if - * we are forcing SubVP P-State switching on the current config. - * - * The number of pipes used for the chosen surface must be less than or equal to the - * number of free pipes available. - * - * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). - * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own - * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't - * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). - * - * @param dc: current dc state - * @param context: new dc state - * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned - * - * Return: - * True if a valid pipe assignment was found for Sub-VP. Otherwise false. - */ -static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context, unsigned int *index) -{ - unsigned int i, pipe_idx; - unsigned int max_frame_time = 0; - bool valid_assignment_found = false; - unsigned int free_pipes = 2; //dcn32_get_num_free_pipes(dc, context); - bool current_assignment_freesync = false; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - unsigned int num_pipes = 0; - unsigned int refresh_rate = 0; - - if (!pipe->stream) - continue; - - // Round up - refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) - / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); - /* SubVP pipe candidate requirements: - * - Refresh rate < 120hz - * - Not able to switch in vactive naturally (switching in active means the - * DET provides enough buffer to hide the P-State switch latency -- trying - * to combine this with SubVP can cause issues with the scheduling). - */ - if (pipe->plane_state && !pipe->top_pipe && - ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 && - vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - - pipe = &context->res_ctx.pipe_ctx[i]; - if (num_pipes <= free_pipes) { - struct dc_stream_state *stream = pipe->stream; - unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / - (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; - if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { - *index = i; - max_frame_time = frame_us; - valid_assignment_found = true; - current_assignment_freesync = false; - /* For the 2-Freesync display case, still choose the one with the - * longest frame time - */ - } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || - (current_assignment_freesync && frame_us > max_frame_time))) { - *index = i; - valid_assignment_found = true; - current_assignment_freesync = true; - } - } - } - pipe_idx++; - } - return valid_assignment_found; -} - -/* - * enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. - * - * This function returns true if there are enough free pipes - * to create the required phantom pipes for any given stream - * (that does not already have phantom pipe assigned). - * - * e.g. For a 2 stream config where the first stream uses one - * pipe and the second stream uses 2 pipes (i.e. pipe split), - * this function will return true because there is 1 remaining - * pipe which can be used as the phantom pipe for the non pipe - * split pipe. - * - * @dc: current dc state - * @context: new dc state - * - * Return: - * True if there are enough free pipes to assign phantom pipes to at least one - * stream that does not already have phantom pipes assigned. Otherwise false. - */ -static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *state) -{ - unsigned int i, split_cnt, free_pipes; - unsigned int min_pipe_split = ctx->config.dcn_pipe_count + 1; // init as max number of pipes + 1 - bool subvp_possible = false; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - // Find the minimum pipe split count for non SubVP pipes - if (pipe->stream && !pipe->top_pipe && - ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) { - split_cnt = 0; - while (pipe) { - split_cnt++; - pipe = pipe->bottom_pipe; - } - - if (split_cnt < min_pipe_split) - min_pipe_split = split_cnt; - } - } - - free_pipes = get_num_free_pipes(ctx, state); - - // SubVP only possible if at least one pipe is being used (i.e. free_pipes - // should not equal to the pipe_count) - if (free_pipes >= min_pipe_split && free_pipes < ctx->config.dcn_pipe_count) - subvp_possible = true; - - return subvp_possible; -} - -/* - * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable - * - * High level algorithm: - * 1. Find longest microschedule length (in us) between the two SubVP pipes - * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both - * pipes still allows for the maximum microschedule to fit in the active - * region for both pipes. - * - * @dc: current dc state - * @context: new dc state - * - * Return: - * bool - True if the SubVP + SubVP config is schedulable, false otherwise - */ -static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *context) -{ - struct pipe_ctx *subvp_pipes[2]; - struct dc_stream_state *phantom = NULL; - uint32_t microschedule_lines = 0; - uint32_t index = 0; - uint32_t i; - uint32_t max_microschedule_us = 0; - int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - uint32_t time_us = 0; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { - phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); - microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + - phantom->timing.v_addressable; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (microschedule_lines * phantom->timing.h_total) / - (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + - ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us + - ctx->config.svp_pstate.subvp_fw_processing_delay_us + 1; - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - subvp_pipes[index] = pipe; - index++; - - // Maximum 2 SubVP pipes - if (index == 2) - break; - } - } - vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * - subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * - subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -/* - * dml2_svp_drr_schedulable: Determine if SubVP + DRR config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe - * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching - * (the margin is equal to the MALL region + DRR margin (500us)) - * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) - * then report the configuration as supported - * - * @dc: current dc state - * @context: new dc state - * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config - * - * Return: - * bool - True if the SubVP + DRR config is schedulable, false otherwise - */ -bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing) -{ - bool schedulable = false; - uint32_t i; - struct pipe_ctx *pipe = NULL; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_stream_state *phantom_stream; - int16_t prefetch_us = 0; - int16_t mall_region_us = 0; - int16_t drr_frame_us = 0; // nominal frame time - int16_t subvp_active_us = 0; - int16_t stretched_drr_us = 0; - int16_t drr_stretched_vblank_us = 0; - int16_t max_vblank_mallregion = 0; - - // Find SubVP pipe - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - // Find the SubVP pipe - if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) - break; - } - - phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); - main_timing = &pipe->stream->timing; - phantom_timing = &phantom_stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - - return schedulable; -} - - -/* - * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe - * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) - * then report the configuration as supported - * 3. If the VBLANK display is DRR, then take the DRR static schedulability path - * - * @dc: current dc state - * @context: new dc state - * - * Return: - * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise - */ -static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *context) -{ - struct pipe_ctx *pipe = NULL; - struct pipe_ctx *subvp_pipe = NULL; - bool found = false; - bool schedulable = false; - uint32_t i = 0; - uint8_t vblank_index = 0; - uint16_t prefetch_us = 0; - uint16_t mall_region_us = 0; - uint16_t vblank_frame_us = 0; - uint16_t subvp_active_us = 0; - uint16_t vblank_blank_us = 0; - uint16_t max_vblank_mallregion = 0; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *vblank_timing = NULL; - struct dc_stream_state *phantom_stream; - enum mall_stream_type pipe_mall_type; - - /* For SubVP + VBLANK/DRR cases, we assume there can only be - * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK - * is supported, it is either a single VBLANK case or two VBLANK - * displays which are synchronized (in which case they have identical - * timings). - */ - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - if (!found && pipe_mall_type == SUBVP_NONE) { - // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). - vblank_index = i; - found = true; - } - - if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) - subvp_pipe = pipe; - } - // Use ignore_msa_timing_param flag to identify as DRR - if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { - // SUBVP + DRR case - schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing); - } else if (found) { - phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream); - main_timing = &subvp_pipe->stream->timing; - phantom_timing = &phantom_stream->timing; - vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -/* - * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle - * static analysis based on the case. - * - * Three cases: - * 1. SubVP + SubVP - * 2. SubVP + VBLANK (DRR checked internally) - * 3. SubVP + VACTIVE (currently unsupported) - * - * @dc: current dc state - * @context: new dc state - * @vlevel: Voltage level calculated by DML - * - * Return: - * bool - True if statically schedulable, false otherwise - */ -bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type) -{ - bool schedulable = true; // true by default for single display case - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - uint32_t i, pipe_idx; - uint8_t subvp_count = 0; - uint8_t vactive_count = 0; - - for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe_mall_type == SUBVP_MAIN) - subvp_count++; - - // Count how many planes that aren't SubVP/phantom are capable of VACTIVE - // switching (SubVP + VACTIVE unsupported). In situations where we force - // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. - if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && - pipe_mall_type == SUBVP_NONE) { - vactive_count++; - } - pipe_idx++; - } - - if (subvp_count == 2) { - // Static schedulability check for SubVP + SubVP case - schedulable = subvp_subvp_schedulable(ctx, context); - } else if (pstate_change_type == dml_dram_clock_change_vblank_w_mall_sub_vp) { - // Static schedulability check for SubVP + VBLANK case. Also handle the case where - // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) - if (vactive_count > 0) - schedulable = false; - else - schedulable = subvp_vblank_schedulable(ctx, context); - } else if (pstate_change_type == dml_dram_clock_change_vactive_w_mall_sub_vp && - vactive_count > 0) { - // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. - // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. - // SubVP + VACTIVE currently unsupported - schedulable = false; - } - return schedulable; -} - -static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state *state, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream, - unsigned int dc_pipe_idx, - unsigned int svp_height, - unsigned int svp_vstartup) -{ - unsigned int i; - double line_time, fp_and_sync_width_time; - struct pipe_ctx *pipe; - uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; - static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000)); - - // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - pipe = &state->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (i == dc_pipe_idx) - break; - } - - // Calculate lines required for pstate allow width and FW processing delays - pstate_width_fw_delay_lines = ((double)(ctx->config.svp_pstate.subvp_fw_processing_delay_us + - ctx->config.svp_pstate.subvp_pstate_allow_width_us) / 1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - - // DML calculation for MALL region doesn't take into account FW delay - // and required pstate allow width for multi-display cases - /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned - * to 2 swaths (i.e. 16 lines) - */ - phantom_vactive = svp_height + pstate_width_fw_delay_lines + ctx->config.svp_pstate.subvp_swath_height_margin_lines; - - phantom_stream->timing.v_front_porch = 1; - - line_time = phantom_stream->timing.h_total / ((double)phantom_stream->timing.pix_clk_100hz * 100); - fp_and_sync_width_time = (phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width) * line_time; - - if ((svp_vstartup * line_time) + fp_and_sync_width_time > cvt_rb_vblank_max) { - svp_vstartup = (cvt_rb_vblank_max - fp_and_sync_width_time) / line_time; - } - - // For backporch of phantom pipe, use vstartup of the main pipe - phantom_bp = svp_vstartup; - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; - phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing -} - -static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup) -{ - struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; - struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream( - ctx->config.svp_pstate.callbacks.dc, - state, - ref_pipe->stream); - - /* stream has limited viewport and small timing */ - memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); - memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); - memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); - set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup); - - ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc, - state, - phantom_stream, - ref_pipe->stream); - return phantom_stream; -} - -static void enable_phantom_plane(struct dml2_context *ctx, - struct dc_state *state, - struct dc_stream_state *phantom_stream, - unsigned int dc_pipe_idx) -{ - struct dc_plane_state *phantom_plane = NULL; - struct dc_plane_state *prev_phantom_plane = NULL; - struct pipe_ctx *curr_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; - - while (curr_pipe) { - if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) { - phantom_plane = prev_phantom_plane; - } else { - phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane( - ctx->config.svp_pstate.callbacks.dc, - state, - curr_pipe->plane_state); - if (!phantom_plane) - return; - } - - memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); - memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, - sizeof(phantom_plane->scaling_quality)); - memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); - memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); - memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); - memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, - sizeof(phantom_plane->plane_size)); - memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, - sizeof(phantom_plane->tiling_info)); - memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); - //phantom_plane->tiling_info.gfx10compatible.compat_level = curr_pipe->plane_state->tiling_info.gfx10compatible.compat_level; - phantom_plane->format = curr_pipe->plane_state->format; - phantom_plane->rotation = curr_pipe->plane_state->rotation; - phantom_plane->visible = curr_pipe->plane_state->visible; - - /* Shadow pipe has small viewport. */ - phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; - - ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); - - curr_pipe = curr_pipe->bottom_pipe; - prev_phantom_plane = phantom_plane; - } -} - -static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_state *state, unsigned int main_pipe_idx, unsigned int svp_height, unsigned int vstartup) -{ - struct dc_stream_state *phantom_stream = NULL; - unsigned int i; - - // The index of the DC pipe passed into this function is guarenteed to - // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't - // already have phantom pipe assigned, etc.) by previous checks. - phantom_stream = enable_phantom_stream(ctx, state, main_pipe_idx, svp_height, vstartup); - enable_phantom_plane(ctx, state, phantom_stream, main_pipe_idx); - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - // Build scaling params for phantom pipes which were newly added. - // We determine which phantom pipes were added by comparing with - // the phantom stream. - if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { - pipe->stream->use_dynamic_meta = false; - pipe->plane_state->flip_immediate = false; - if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) { - // Log / remove phantom pipes since failed to build scaling params - } - } - } -} - -static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) -{ - int i, old_plane_count; - struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; - - for (i = 0; i < context->stream_count; i++) - if (context->streams[i] == stream) { - stream_status = &context->stream_status[i]; - break; - } - - if (stream_status == NULL) { - return false; - } - - old_plane_count = stream_status->plane_count; - - for (i = 0; i < old_plane_count; i++) - del_planes[i] = stream_status->plane_states[i]; - - for (i = 0; i < old_plane_count; i++) { - if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) - return false; - ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]); - } - - return true; -} - -bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state) -{ - int i; - bool removed_pipe = false; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { - phantom_stream = pipe->stream; - - remove_all_phantom_planes_for_stream(ctx, phantom_stream, state); - ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); - ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); - - removed_pipe = true; - } - - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; - } - } - return removed_pipe; -} - - -/* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ -bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info) -{ - unsigned int dc_pipe_idx, dml_pipe_idx; - unsigned int svp_height, vstartup; - - if (ctx->config.svp_pstate.force_disable_subvp) - return false; - - if (!all_pipes_have_stream_and_plane(ctx, state)) - return false; - - if (mpo_in_use(state)) - return false; - - merge_pipes_for_subvp(ctx, state); - // to re-initialize viewport after the pipe merge - for (int i = 0; i < ctx->config.dcn_pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &state->res_ctx.pipe_ctx[i]; - - if (!pipe_ctx->plane_state || !pipe_ctx->stream) - continue; - - ctx->config.svp_pstate.callbacks.build_scaling_params(pipe_ctx); - } - - if (enough_pipes_for_subvp(ctx, state) && assign_subvp_pipe(ctx, state, &dc_pipe_idx)) { - dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(ctx, state->res_ctx.pipe_ctx[dc_pipe_idx].stream->stream_id); - svp_height = mode_support_info->SubViewportLinesNeededInMALL[dml_pipe_idx]; - vstartup = dml_get_vstartup_calculated(&ctx->v20.dml_core_ctx, dml_pipe_idx); - - add_phantom_pipes_for_main_pipe(ctx, state, dc_pipe_idx, svp_height, vstartup); - - return true; - } - - return false; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h deleted file mode 100644 index 9d64851f54e7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_MALL_PHANTOM_H__ -#define __DML2_MALL_PHANTOM_H__ - -#include "dml2_dc_types.h" -#include "display_mode_core_structs.h" - -struct dml2_svp_helper_select_best_svp_candidate_params { - const struct dml_display_cfg_st *dml_config; - const struct dml_mode_support_info_st *mode_support_info; - const unsigned int blacklist; - unsigned int *candidate_index; -}; - -struct dml2_context; - -unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context); - -bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info); - -bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state); - -bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type); - -bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c deleted file mode 100644 index ef693f608d59..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c +++ /dev/null @@ -1,311 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dml2_policy.h" - -static void get_optimal_ntuple( - const struct soc_bounding_box_st *socbb, - struct soc_state_bounding_box_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); - - entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * - socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); - - entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * - socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * - socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); - - entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); - } -} - -static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, - struct soc_state_bounding_box_st *entry) -{ - float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * - socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); - - float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); - - float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); - - float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; - - if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) - limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; - - if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) - limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; - - return limiting_bw_mbytes_sec; -} - -static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, - struct soc_states_st *table, - struct soc_state_bounding_box_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - get_optimal_ntuple(socbb, entry); - - if (table->num_states == 0) { - index = 0; - } else { - net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); - while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) { - index++; - if (index >= (int) table->num_states) - break; - } - - for (i = table->num_states; i > index; i--) { - table->state_array[i] = table->state_array[i - 1]; - } - //ASSERT(index < MAX_CLK_TABLE_SIZE); - } - - table->state_array[index] = *entry; - table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; - table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; - table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; - table->num_states++; -} - -static void remove_entry_from_table_at_index(struct soc_states_st *table, - unsigned int index) -{ - int i; - - if (table->num_states == 0) - return; - - for (i = index; i < (int) table->num_states - 1; i++) { - table->state_array[i] = table->state_array[i + 1]; - } - memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); -} - -int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, - struct dml2_policy_build_synthetic_soc_states_params *p) -{ - int i, j; - unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; - unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; - unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; - - int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, - max_uclk_mhz = 0, max_socclk_mhz = 0; - - int num_uclk_dpms = 0, num_fclk_dpms = 0; - - for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { - if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; - if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) - max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; - if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) - max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; - if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) - max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; - if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; - if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; - if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; - if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; - - if (p->in_states->state_array[i].fabricclk_mhz > 0) - num_fclk_dpms++; - if (p->in_states->state_array[i].dram_speed_mts > 0) - num_uclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) - return -1; - - p->out_states->num_states = 0; - - s->entry = p->in_states->state_array[0]; - - s->entry.dispclk_mhz = max_dispclk_mhz; - s->entry.dppclk_mhz = max_dppclk_mhz; - s->entry.dtbclk_mhz = max_dtbclk_mhz; - s->entry.phyclk_mhz = max_phyclk_mhz; - - s->entry.dscclk_mhz = max_dispclk_mhz / 3; - s->entry.phyclk_mhz = max_phyclk_mhz; - s->entry.dtbclk_mhz = max_dtbclk_mhz; - - // Insert all the DCFCLK STAs first - for (i = 0; i < p->num_dcfclk_stas; i++) { - s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; - s->entry.fabricclk_mhz = 0; - s->entry.dram_speed_mts = 0; - if (i > 0) - s->entry.socclk_mhz = max_socclk_mhz; - - insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); - } - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - s->entry.dcfclk_mhz = 0; - s->entry.fabricclk_mhz = 0; - s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; - if (i == 0) { - s->entry.socclk_mhz = min_socclk_mhz; - } else { - s->entry.socclk_mhz = max_socclk_mhz; - } - - insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); - } - - // Insert FCLK DPMs (if present) - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - s->entry.dcfclk_mhz = 0; - s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; - s->entry.dram_speed_mts = 0; - - insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); - } - } - // Add max FCLK - else { - s->entry.dcfclk_mhz = 0; - s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; - s->entry.dram_speed_mts = 0; - - insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); - } - - // Remove states that require higher clocks than are supported - for (i = p->out_states->num_states - 1; i >= 0; i--) { - if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || - p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || - p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) - remove_entry_from_table_at_index(p->out_states, i); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Round up UCLK to DPMs - for (i = p->out_states->num_states - 1; i >= 0; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { - p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = p->out_states->num_states - 1; i >= 0; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { - p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; - break; - } - } - } - } - - // Clamp to min FCLK/DCFCLK - for (i = p->out_states->num_states - 1; i >= 0; i--) { - if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { - p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; - } - if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { - p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < (int) p->out_states->num_states - 1) { - if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && - p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && - p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(p->out_states, i); - else - i++; - } - - return 0; -} - -void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) -{ - for (int i = 0; i < __DML_NUM_PLANES__; i++) { - policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? - policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; - policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; - policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; - } - - /* Change the default policy initializations as per spreadsheet. We might need to - * review and change them later on as per Jun's earlier comments. - */ - policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; - policy->UseMinimumRequiredDCFCLK = false; - policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? - policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? - policy->USRRetrainingRequiredFinal = true; - policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? - policy->NomDETInKByteOverrideEnable = false; - policy->NomDETInKByteOverrideValue = 0; - policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; - policy->SynchronizeTimingsFinal = true; - policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; - policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? - policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? - if (project == dml_project_dcn35 || - project == dml_project_dcn36 || - project == dml_project_dcn351) { - policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; - policy->EnhancedPrefetchScheduleAccelerationFinal = 0; - policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ - policy->UseOnlyMaxPrefetchModes = 1; - } -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h deleted file mode 100644 index e83e05248592..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __DML2_POLICY_H__ -#define __DML2_POLICY_H__ - -#include "display_mode_core_structs.h" - -struct dml2_policy_build_synthetic_soc_states_params { - const struct soc_bounding_box_st *in_bbox; - struct soc_states_st *in_states; - struct soc_states_st *out_states; - int *dcfclk_stas_mhz; - int num_dcfclk_stas; -}; - -struct dml2_policy_build_synthetic_soc_states_scratch { - struct soc_state_bounding_box_st entry; -}; - -int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, - struct dml2_policy_build_synthetic_soc_states_params *p); - -void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c deleted file mode 100644 index 3b866e876bf4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ /dev/null @@ -1,1525 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "display_mode_core.h" -#include "dml2_internal_types.h" -#include "dml2_translation_helper.h" - -#define NUM_DCFCLK_STAS 5 -#define NUM_DCFCLK_STAS_NEW 8 - -void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) -{ - switch (dml2->v20.dml_core_ctx.project) { - case dml_project_dcn32: - case dml_project_dcn321: - default: - // Hardcoded values for DCN32x - out->vblank_nom_default_us = 600; - out->rob_buffer_size_kbytes = 128; - out->config_return_buffer_size_in_kbytes = 1280; - out->config_return_buffer_segment_size_in_kbytes = 64; - out->compressed_buffer_segment_size_in_kbytes = 64; - out->meta_fifo_size_in_kentries = 22; - out->zero_size_buffer_entries = 512; - out->dpte_buffer_size_in_pte_reqs_luma = 68; - out->dpte_buffer_size_in_pte_reqs_chroma = 36; - out->dcc_meta_buffer_size_bytes = 6272; - out->gpuvm_max_page_table_levels = 4; - out->hostvm_max_page_table_levels = 0; - out->pixel_chunk_size_kbytes = 8; - //out->alpha_pixel_chunk_size_kbytes; - out->min_pixel_chunk_size_bytes = 1024; - out->meta_chunk_size_kbytes = 2; - out->min_meta_chunk_size_bytes = 256; - out->writeback_chunk_size_kbytes = 8; - out->line_buffer_size_bits = 1171920; - out->max_line_buffer_lines = 32; - out->writeback_interface_buffer_size_kbytes = 90; - //Number of pipes after DCN Pipe harvesting - out->max_num_dpp = dml2->config.dcn_pipe_count; - out->max_num_otg = dml2->config.dcn_pipe_count; - out->max_num_wb = 1; - out->max_dchub_pscl_bw_pix_per_clk = 4; - out->max_pscl_lb_bw_pix_per_clk = 2; - out->max_lb_vscl_bw_pix_per_clk = 4; - out->max_vscl_hscl_bw_pix_per_clk = 4; - out->max_hscl_ratio = 6; - out->max_vscl_ratio = 6; - out->max_hscl_taps = 8; - out->max_vscl_taps = 8; - out->dispclk_ramp_margin_percent = 1; - out->dppclk_delay_subtotal = 47; - out->dppclk_delay_scl = 50; - out->dppclk_delay_scl_lb_only = 16; - out->dppclk_delay_cnvc_formatter = 28; - out->dppclk_delay_cnvc_cursor = 6; - out->cursor_buffer_size = 16; - out->cursor_chunk_size = 2; - out->dispclk_delay_subtotal = 125; - out->max_inter_dcn_tile_repeaters = 8; - out->writeback_max_hscl_ratio = 1; - out->writeback_max_vscl_ratio = 1; - out->writeback_min_hscl_ratio = 1; - out->writeback_min_vscl_ratio = 1; - out->writeback_max_hscl_taps = 1; - out->writeback_max_vscl_taps = 1; - out->writeback_line_buffer_buffer_size = 0; - out->num_dsc = 4; - out->maximum_dsc_bits_per_component = 12; - out->maximum_pixels_per_line_per_dsc_unit = 6016; - out->dsc422_native_support = true; - out->dcc_supported = true; - out->ptoi_supported = false; - - out->gpuvm_enable = false; - out->hostvm_enable = false; - out->cursor_64bpp_support = false; - out->dynamic_metadata_vm_enabled = false; - - out->max_num_hdmi_frl_outputs = 1; - out->max_num_dp2p0_outputs = 2; - out->max_num_dp2p0_streams = 4; - break; - - case dml_project_dcn35: - case dml_project_dcn351: - case dml_project_dcn36: - out->rob_buffer_size_kbytes = 64; - out->config_return_buffer_size_in_kbytes = 1792; - out->compressed_buffer_segment_size_in_kbytes = 64; - out->meta_fifo_size_in_kentries = 32; - out->zero_size_buffer_entries = 512; - out->pixel_chunk_size_kbytes = 8; - out->alpha_pixel_chunk_size_kbytes = 4; - out->min_pixel_chunk_size_bytes = 1024; - out->meta_chunk_size_kbytes = 2; - out->min_meta_chunk_size_bytes = 256; - out->writeback_chunk_size_kbytes = 8; - out->dpte_buffer_size_in_pte_reqs_luma = 68; - out->dpte_buffer_size_in_pte_reqs_chroma = 36; - out->dcc_meta_buffer_size_bytes = 6272; - out->gpuvm_enable = 1; - out->hostvm_enable = 1; - out->gpuvm_max_page_table_levels = 1; - out->hostvm_max_page_table_levels = 2; - out->num_dsc = 4; - out->maximum_dsc_bits_per_component = 12; - out->maximum_pixels_per_line_per_dsc_unit = 6016; - out->dsc422_native_support = 1; - out->line_buffer_size_bits = 986880; - out->dcc_supported = 1; - out->max_line_buffer_lines = 32; - out->writeback_interface_buffer_size_kbytes = 90; - out->max_num_dpp = 4; - out->max_num_otg = 4; - out->max_num_hdmi_frl_outputs = 1; - out->max_num_dp2p0_outputs = 2; - out->max_num_dp2p0_streams = 4; - out->max_num_wb = 1; - - out->max_dchub_pscl_bw_pix_per_clk = 4; - out->max_pscl_lb_bw_pix_per_clk = 2; - out->max_lb_vscl_bw_pix_per_clk = 4; - out->max_vscl_hscl_bw_pix_per_clk = 4; - out->max_hscl_ratio = 6; - out->max_vscl_ratio = 6; - out->max_hscl_taps = 8; - out->max_vscl_taps = 8; - out->dispclk_ramp_margin_percent = 1.11; - - out->dppclk_delay_subtotal = 47; - out->dppclk_delay_scl = 50; - out->dppclk_delay_scl_lb_only = 16; - out->dppclk_delay_cnvc_formatter = 28; - out->dppclk_delay_cnvc_cursor = 6; - out->dispclk_delay_subtotal = 125; - - out->dynamic_metadata_vm_enabled = false; - out->max_inter_dcn_tile_repeaters = 8; - out->cursor_buffer_size = 16; // kBytes - out->cursor_chunk_size = 2; // kBytes - - out->writeback_line_buffer_buffer_size = 0; - out->writeback_max_hscl_ratio = 1; - out->writeback_max_vscl_ratio = 1; - out->writeback_min_hscl_ratio = 1; - out->writeback_min_vscl_ratio = 1; - out->writeback_max_hscl_taps = 1; - out->writeback_max_vscl_taps = 1; - out->ptoi_supported = 0; - - out->vblank_nom_default_us = 668; /*not in dml, but in programming guide, hard coded in dml2_translate_ip_params*/ - out->config_return_buffer_segment_size_in_kbytes = 64; /*required, but not exist,, hard coded in dml2_translate_ip_params*/ - break; - - case dml_project_dcn401: - // Hardcoded values for DCN4m - out->vblank_nom_default_us = 668; //600; - out->rob_buffer_size_kbytes = 192; //128; - out->config_return_buffer_size_in_kbytes = 1344; //1280; - out->config_return_buffer_segment_size_in_kbytes = 64; - out->compressed_buffer_segment_size_in_kbytes = 64; - out->meta_fifo_size_in_kentries = 22; - out->dpte_buffer_size_in_pte_reqs_luma = 68; - out->dpte_buffer_size_in_pte_reqs_chroma = 36; - out->gpuvm_max_page_table_levels = 4; - out->pixel_chunk_size_kbytes = 8; - out->alpha_pixel_chunk_size_kbytes = 4; - out->min_pixel_chunk_size_bytes = 1024; - out->writeback_chunk_size_kbytes = 8; - out->line_buffer_size_bits = 1171920; - out->max_line_buffer_lines = 32; - out->writeback_interface_buffer_size_kbytes = 90; - //Number of pipes after DCN Pipe harvesting - out->max_num_dpp = dml2->config.dcn_pipe_count; - out->max_num_otg = dml2->config.dcn_pipe_count; - out->max_num_wb = 1; - out->max_dchub_pscl_bw_pix_per_clk = 4; - out->max_pscl_lb_bw_pix_per_clk = 2; - out->max_lb_vscl_bw_pix_per_clk = 4; - out->max_vscl_hscl_bw_pix_per_clk = 4; - out->max_hscl_ratio = 6; - out->max_vscl_ratio = 6; - out->max_hscl_taps = 8; - out->max_vscl_taps = 8; - out->dispclk_ramp_margin_percent = 1; - out->dppclk_delay_subtotal = 47; - out->dppclk_delay_scl = 50; - out->dppclk_delay_scl_lb_only = 16; - out->dppclk_delay_cnvc_formatter = 28; - out->dppclk_delay_cnvc_cursor = 6; - out->dispclk_delay_subtotal = 125; - out->cursor_buffer_size = 24; //16 - out->cursor_chunk_size = 2; - out->max_inter_dcn_tile_repeaters = 8; - out->writeback_max_hscl_ratio = 1; - out->writeback_max_vscl_ratio = 1; - out->writeback_min_hscl_ratio = 1; - out->writeback_min_vscl_ratio = 1; - out->writeback_max_hscl_taps = 1; - out->writeback_max_vscl_taps = 1; - out->writeback_line_buffer_buffer_size = 0; - out->num_dsc = 4; - out->maximum_dsc_bits_per_component = 12; - out->maximum_pixels_per_line_per_dsc_unit = 5760; - out->dsc422_native_support = true; - out->dcc_supported = true; - out->ptoi_supported = false; - - out->gpuvm_enable = false; - out->hostvm_enable = false; - out->cursor_64bpp_support = true; //false; - out->dynamic_metadata_vm_enabled = false; - - out->max_num_hdmi_frl_outputs = 1; - out->max_num_dp2p0_outputs = 4; //2; - out->max_num_dp2p0_streams = 4; - break; - } -} - -void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) -{ - out->dprefclk_mhz = dml2->config.bbox_overrides.dprefclk_mhz; - out->xtalclk_mhz = dml2->config.bbox_overrides.xtalclk_mhz; - out->pcierefclk_mhz = 100; - out->refclk_mhz = dml2->config.bbox_overrides.dchub_refclk_mhz; - - out->max_outstanding_reqs = 512; - out->pct_ideal_sdp_bw_after_urgent = 100; - out->pct_ideal_fabric_bw_after_urgent = 67; - out->pct_ideal_dram_bw_after_urgent_pixel_only = 20; - out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = 60; - out->pct_ideal_dram_bw_after_urgent_vm_only = 30; - out->pct_ideal_dram_bw_after_urgent_strobe = 67; - out->max_avg_sdp_bw_use_normal_percent = 80; - out->max_avg_fabric_bw_use_normal_percent = 60; - out->max_avg_dram_bw_use_normal_percent = 15; - out->max_avg_dram_bw_use_normal_strobe_percent = 50; - - out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096; - out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096; - out->urgent_out_of_order_return_per_channel_vm_only_bytes = 4096; - out->return_bus_width_bytes = 64; - out->dram_channel_width_bytes = 2; - out->fabric_datapath_to_dcn_data_return_bytes = 64; - out->hostvm_min_page_size_kbytes = 0; - out->gpuvm_min_page_size_kbytes = 256; - out->phy_downspread_percent = 0.38; - out->dcn_downspread_percent = 0.5; - out->dispclk_dppclk_vco_speed_mhz = dml2->config.bbox_overrides.disp_pll_vco_speed_mhz; - out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64 or 32 MB; - - out->do_urgent_latency_adjustment = true; - - switch (dml2->v20.dml_core_ctx.project) { - - case dml_project_dcn32: - default: - out->num_chans = 24; - out->round_trip_ping_latency_dcfclk_cycles = 263; - out->smn_latency_us = 2; - break; - - case dml_project_dcn321: - out->num_chans = 8; - out->round_trip_ping_latency_dcfclk_cycles = 207; - out->smn_latency_us = 0; - break; - - case dml_project_dcn35: - case dml_project_dcn351: - case dml_project_dcn36: - out->num_chans = 4; - out->round_trip_ping_latency_dcfclk_cycles = 106; - out->smn_latency_us = 2; - out->dispclk_dppclk_vco_speed_mhz = 3600; - out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; - break; - - case dml_project_dcn401: - out->pct_ideal_fabric_bw_after_urgent = 76; //67; - out->max_avg_sdp_bw_use_normal_percent = 75; //80; - out->max_avg_fabric_bw_use_normal_percent = 57; //60; - - out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 0; //4096; - out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 0; //4096; - out->urgent_out_of_order_return_per_channel_vm_only_bytes = 0; //4096; - - out->num_chans = 16; - out->round_trip_ping_latency_dcfclk_cycles = 1000; //263; - out->smn_latency_us = 0; //2 us - out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64; - break; - } - /* ---Overrides if available--- */ - if (dml2->config.bbox_overrides.dram_num_chan) - out->num_chans = dml2->config.bbox_overrides.dram_num_chan; - - if (dml2->config.bbox_overrides.dram_chanel_width_bytes) - out->dram_channel_width_bytes = dml2->config.bbox_overrides.dram_chanel_width_bytes; -} - -void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, - const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) -{ - struct dml2_policy_build_synthetic_soc_states_scratch *s = &dml2->v20.scratch.create_scratch.build_synthetic_socbb_scratch; - struct dml2_policy_build_synthetic_soc_states_params *p = &dml2->v20.scratch.build_synthetic_socbb_params; - unsigned int dcfclk_stas_mhz[NUM_DCFCLK_STAS] = {0}; - unsigned int dcfclk_stas_mhz_new[NUM_DCFCLK_STAS_NEW] = {0}; - unsigned int dml_project = dml2->v20.dml_core_ctx.project; - - unsigned int i = 0; - unsigned int transactions_per_mem_clock = 16; // project specific, depends on used Memory type - - if (dml_project == dml_project_dcn351) { - p->dcfclk_stas_mhz = dcfclk_stas_mhz_new; - p->num_dcfclk_stas = NUM_DCFCLK_STAS_NEW; - } else { - p->dcfclk_stas_mhz = dcfclk_stas_mhz; - p->num_dcfclk_stas = NUM_DCFCLK_STAS; - } - - p->in_bbox = in_bbox; - p->out_states = out; - p->in_states = &dml2->v20.scratch.create_scratch.in_states; - - - /* Initial hardcoded values */ - switch (dml2->v20.dml_core_ctx.project) { - - case dml_project_dcn32: - default: - p->in_states->num_states = 2; - transactions_per_mem_clock = 16; - p->in_states->state_array[0].socclk_mhz = 620.0; - p->in_states->state_array[0].dscclk_mhz = 716.667; - p->in_states->state_array[0].phyclk_mhz = 810; - p->in_states->state_array[0].phyclk_d18_mhz = 667; - p->in_states->state_array[0].phyclk_d32_mhz = 625; - p->in_states->state_array[0].dtbclk_mhz = 1564.0; - p->in_states->state_array[0].fabricclk_mhz = 450.0; - p->in_states->state_array[0].dcfclk_mhz = 300.0; - p->in_states->state_array[0].dispclk_mhz = 2150.0; - p->in_states->state_array[0].dppclk_mhz = 2150.0; - p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; - - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; - p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; - p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; - p->in_states->state_array[0].writeback_latency_us = 12; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; - p->in_states->state_array[0].sr_exit_z8_time_us = 0; - p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; - p->in_states->state_array[0].dram_clock_change_latency_us = 400; - p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; - p->in_states->state_array[0].sr_exit_time_us = 42.97; - p->in_states->state_array[0].sr_enter_plus_exit_time_us = 49.94; - p->in_states->state_array[0].fclk_change_latency_us = 20; - p->in_states->state_array[0].usr_retraining_latency_us = 2; - - p->in_states->state_array[1].socclk_mhz = 1200.0; - p->in_states->state_array[1].fabricclk_mhz = 2500.0; - p->in_states->state_array[1].dcfclk_mhz = 1564.0; - p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; - break; - - case dml_project_dcn321: - p->in_states->num_states = 2; - transactions_per_mem_clock = 16; - p->in_states->state_array[0].socclk_mhz = 582.0; - p->in_states->state_array[0].dscclk_mhz = 573.333; - p->in_states->state_array[0].phyclk_mhz = 810; - p->in_states->state_array[0].phyclk_d18_mhz = 667; - p->in_states->state_array[0].phyclk_d32_mhz = 313; - p->in_states->state_array[0].dtbclk_mhz = 1564.0; - p->in_states->state_array[0].fabricclk_mhz = 450.0; - p->in_states->state_array[0].dcfclk_mhz = 300.0; - p->in_states->state_array[0].dispclk_mhz = 1720.0; - p->in_states->state_array[0].dppclk_mhz = 1720.0; - p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; - - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; - p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; - p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; - p->in_states->state_array[0].writeback_latency_us = 12; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; - p->in_states->state_array[0].sr_exit_z8_time_us = 0; - p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; - p->in_states->state_array[0].dram_clock_change_latency_us = 400; - p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; - p->in_states->state_array[0].sr_exit_time_us = 19.95; - p->in_states->state_array[0].sr_enter_plus_exit_time_us = 24.36; - p->in_states->state_array[0].fclk_change_latency_us = 7; - p->in_states->state_array[0].usr_retraining_latency_us = 0; - - p->in_states->state_array[1].socclk_mhz = 1200.0; - p->in_states->state_array[1].fabricclk_mhz = 2250.0; - p->in_states->state_array[1].dcfclk_mhz = 1434.0; - p->in_states->state_array[1].dram_speed_mts = 1000 * transactions_per_mem_clock; - break; - case dml_project_dcn401: - p->in_states->num_states = 2; - transactions_per_mem_clock = 16; - p->in_states->state_array[0].socclk_mhz = 300; //620.0; - p->in_states->state_array[0].dscclk_mhz = 666.667; //716.667; - p->in_states->state_array[0].phyclk_mhz = 810; - p->in_states->state_array[0].phyclk_d18_mhz = 667; - p->in_states->state_array[0].phyclk_d32_mhz = 625; - p->in_states->state_array[0].dtbclk_mhz = 2000; //1564.0; - p->in_states->state_array[0].fabricclk_mhz = 300; //450.0; - p->in_states->state_array[0].dcfclk_mhz = 200; //300.0; - p->in_states->state_array[0].dispclk_mhz = 2000; //2150.0; - p->in_states->state_array[0].dppclk_mhz = 2000; //2150.0; - p->in_states->state_array[0].dram_speed_mts = 97 * transactions_per_mem_clock; //100 * - - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; - p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; - p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; - p->in_states->state_array[0].writeback_latency_us = 12; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; - p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 1000; //3000; - p->in_states->state_array[0].sr_exit_z8_time_us = 0; - p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; - p->in_states->state_array[0].dram_clock_change_latency_us = 400; - p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; - p->in_states->state_array[0].sr_exit_time_us = 15.70; //42.97; - p->in_states->state_array[0].sr_enter_plus_exit_time_us = 20.20; //49.94; - p->in_states->state_array[0].fclk_change_latency_us = 0; //20; - p->in_states->state_array[0].usr_retraining_latency_us = 0; //2; - - p->in_states->state_array[1].socclk_mhz = 1600; //1200.0; - p->in_states->state_array[1].fabricclk_mhz = 2500; //2500.0; - p->in_states->state_array[1].dcfclk_mhz = 1800; //1564.0; - p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; - break; - } - - /* Override from passed values, if available */ - for (i = 0; i < p->in_states->num_states; i++) { - if (dml2->config.bbox_overrides.sr_exit_latency_us) { - p->in_states->state_array[i].sr_exit_time_us = - dml2->config.bbox_overrides.sr_exit_latency_us; - } - - if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { - p->in_states->state_array[i].sr_enter_plus_exit_time_us = - dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; - } - - if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { - p->in_states->state_array[i].sr_exit_z8_time_us = - dml2->config.bbox_overrides.sr_exit_z8_time_us; - } - - if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { - p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = - dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; - } - - if (dml2->config.bbox_overrides.urgent_latency_us) { - p->in_states->state_array[i].urgent_latency_pixel_data_only_us = - dml2->config.bbox_overrides.urgent_latency_us; - } - - if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { - p->in_states->state_array[i].dram_clock_change_latency_us = - dml2->config.bbox_overrides.dram_clock_change_latency_us; - } - - if (dml2->config.bbox_overrides.fclk_change_latency_us) { - p->in_states->state_array[i].fclk_change_latency_us = - dml2->config.bbox_overrides.fclk_change_latency_us; - } - } - - /* DCFCLK stas values are project specific */ - if ((dml2->v20.dml_core_ctx.project == dml_project_dcn32) || - (dml2->v20.dml_core_ctx.project == dml_project_dcn321)) { - p->dcfclk_stas_mhz[0] = p->in_states->state_array[0].dcfclk_mhz; - p->dcfclk_stas_mhz[1] = 615; - p->dcfclk_stas_mhz[2] = 906; - p->dcfclk_stas_mhz[3] = 1324; - p->dcfclk_stas_mhz[4] = p->in_states->state_array[1].dcfclk_mhz; - } else if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && - dml2->v20.dml_core_ctx.project != dml_project_dcn36 && - dml2->v20.dml_core_ctx.project != dml_project_dcn351) { - p->dcfclk_stas_mhz[0] = 300; - p->dcfclk_stas_mhz[1] = 615; - p->dcfclk_stas_mhz[2] = 906; - p->dcfclk_stas_mhz[3] = 1324; - p->dcfclk_stas_mhz[4] = 1500; - } - /* Copy clocks tables entries, if available */ - if (dml2->config.bbox_overrides.clks_table.num_states) { - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { - p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; - } - - p->dcfclk_stas_mhz[0] = dml2->config.bbox_overrides.clks_table.clk_entries[0].dcfclk_mhz; - if (i > 1) - p->dcfclk_stas_mhz[4] = dml2->config.bbox_overrides.clks_table.clk_entries[i-1].dcfclk_mhz; - - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels; i++) { - p->in_states->state_array[i].fabricclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].fclk_mhz; - } - - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels; i++) { - p->in_states->state_array[i].dram_speed_mts = - dml2->config.bbox_overrides.clks_table.clk_entries[i].memclk_mhz * transactions_per_mem_clock; - } - - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels; i++) { - p->in_states->state_array[i].socclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].socclk_mhz; - } - - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { - if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) - p->in_states->state_array[i].dtbclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; - } - - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { - p->in_states->state_array[i].dispclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz; - p->in_states->state_array[i].dppclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz; - } - } - - if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || - dml2->v20.dml_core_ctx.project == dml_project_dcn36 || - dml2->v20.dml_core_ctx.project == dml_project_dcn351) { - int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, - max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0; - - for (i = 0; i < p->in_states->num_states; i++) { - if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz; - if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) - max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz; - if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) - max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz; - if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) - max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts; - if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz; - if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz; - if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; - if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; - } - - for (i = 0; i < p->in_states->num_states; i++) { - /* Independent states - including base (unlisted) parameters from state 0. */ - p->out_states->state_array[i] = p->in_states->state_array[0]; - - p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz; - p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz; - p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; - p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; - - p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; - p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; - p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; - - /* Dependent states. */ - p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; - p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; - p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; - p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz; - } - - p->out_states->num_states = p->in_states->num_states; - } else { - dml2_policy_build_synthetic_soc_states(s, p); - } -} - -void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) -{ - const struct _vcs_dpi_ip_params_st *in_ip_params = &in->dml.ip; - /* Copy over the IP params tp dml2_ctx */ - out->compressed_buffer_segment_size_in_kbytes = in_ip_params->compressed_buffer_segment_size_in_kbytes; - out->config_return_buffer_size_in_kbytes = in_ip_params->config_return_buffer_size_in_kbytes; - out->cursor_buffer_size = in_ip_params->cursor_buffer_size; - out->cursor_chunk_size = in_ip_params->cursor_chunk_size; - out->dcc_meta_buffer_size_bytes = in_ip_params->dcc_meta_buffer_size_bytes; - out->dcc_supported = in_ip_params->dcc_supported; - out->dispclk_delay_subtotal = in_ip_params->dispclk_delay_subtotal; - out->dispclk_ramp_margin_percent = in_ip_params->dispclk_ramp_margin_percent; - out->dppclk_delay_cnvc_cursor = in_ip_params->dppclk_delay_cnvc_cursor; - out->dppclk_delay_cnvc_formatter = in_ip_params->dppclk_delay_cnvc_formatter; - out->dppclk_delay_scl = in_ip_params->dppclk_delay_scl; - out->dppclk_delay_scl_lb_only = in_ip_params->dppclk_delay_scl_lb_only; - out->dppclk_delay_subtotal = in_ip_params->dppclk_delay_subtotal; - out->dpte_buffer_size_in_pte_reqs_chroma = in_ip_params->dpte_buffer_size_in_pte_reqs_chroma; - out->dpte_buffer_size_in_pte_reqs_luma = in_ip_params->dpte_buffer_size_in_pte_reqs_luma; - out->dsc422_native_support = in_ip_params->dsc422_native_support; - out->dynamic_metadata_vm_enabled = in_ip_params->dynamic_metadata_vm_enabled; - out->gpuvm_enable = in_ip_params->gpuvm_enable; - out->gpuvm_max_page_table_levels = in_ip_params->gpuvm_max_page_table_levels; - out->hostvm_enable = in_ip_params->hostvm_enable; - out->hostvm_max_page_table_levels = in_ip_params->hostvm_max_page_table_levels; - out->line_buffer_size_bits = in_ip_params->line_buffer_size_bits; - out->maximum_dsc_bits_per_component = in_ip_params->maximum_dsc_bits_per_component; - out->maximum_pixels_per_line_per_dsc_unit = in_ip_params->maximum_pixels_per_line_per_dsc_unit; - out->max_dchub_pscl_bw_pix_per_clk = in_ip_params->max_dchub_pscl_bw_pix_per_clk; - out->max_hscl_ratio = in_ip_params->max_hscl_ratio; - out->max_hscl_taps = in_ip_params->max_hscl_taps; - out->max_inter_dcn_tile_repeaters = in_ip_params->max_inter_dcn_tile_repeaters; - out->max_lb_vscl_bw_pix_per_clk = in_ip_params->max_lb_vscl_bw_pix_per_clk; - out->max_line_buffer_lines = in_ip_params->max_line_buffer_lines; - out->max_num_dp2p0_outputs = in_ip_params->max_num_dp2p0_outputs; - out->max_num_dp2p0_streams = in_ip_params->max_num_dp2p0_streams; - out->max_num_dpp = in_ip_params->max_num_dpp; - out->max_num_hdmi_frl_outputs = in_ip_params->max_num_hdmi_frl_outputs; - out->max_num_otg = in_ip_params->max_num_otg; - out->max_num_wb = in_ip_params->max_num_wb; - out->max_pscl_lb_bw_pix_per_clk = in_ip_params->max_pscl_lb_bw_pix_per_clk; - out->max_vscl_hscl_bw_pix_per_clk = in_ip_params->max_vscl_hscl_bw_pix_per_clk; - out->max_vscl_ratio = in_ip_params->max_vscl_ratio; - out->max_vscl_taps = in_ip_params->max_vscl_taps; - out->meta_chunk_size_kbytes = in_ip_params->meta_chunk_size_kbytes; - out->meta_fifo_size_in_kentries = in_ip_params->meta_fifo_size_in_kentries; - out->min_meta_chunk_size_bytes = in_ip_params->min_meta_chunk_size_bytes; - out->min_pixel_chunk_size_bytes = in_ip_params->min_pixel_chunk_size_bytes; - out->num_dsc = in_ip_params->num_dsc; - out->pixel_chunk_size_kbytes = in_ip_params->pixel_chunk_size_kbytes; - out->ptoi_supported = in_ip_params->ptoi_supported; - out->rob_buffer_size_kbytes = in_ip_params->rob_buffer_size_kbytes; - out->writeback_chunk_size_kbytes = in_ip_params->writeback_chunk_size_kbytes; - out->writeback_interface_buffer_size_kbytes = in_ip_params->writeback_interface_buffer_size_kbytes; - out->writeback_line_buffer_buffer_size = in_ip_params->writeback_line_buffer_buffer_size; - out->writeback_max_hscl_ratio = in_ip_params->writeback_max_hscl_ratio; - out->writeback_max_hscl_taps = in_ip_params->writeback_max_hscl_taps; - out->writeback_max_vscl_ratio = in_ip_params->writeback_max_vscl_ratio; - out->writeback_max_vscl_taps = in_ip_params->writeback_max_vscl_taps; - out->writeback_min_hscl_ratio = in_ip_params->writeback_min_hscl_ratio; - out->writeback_min_vscl_ratio = in_ip_params->writeback_min_vscl_ratio; - out->zero_size_buffer_entries = in_ip_params->zero_size_buffer_entries; - - /* As per hardcoded reference / discussions */ - out->config_return_buffer_segment_size_in_kbytes = 64; - //out->vblank_nom_default_us = 600; - out->vblank_nom_default_us = in_ip_params->VBlankNomDefaultUS; -} - -void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st *out) -{ - const struct _vcs_dpi_soc_bounding_box_st *in_soc_params = &in->dml.soc; - /* Copy over the SOCBB params to dml2_ctx */ - out->dispclk_dppclk_vco_speed_mhz = in_soc_params->dispclk_dppclk_vco_speed_mhz; - out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; - out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; - out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; - out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; - out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; - out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; - out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; - out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; - out->max_avg_dram_bw_use_normal_strobe_percent = in_soc_params->max_avg_dram_bw_use_normal_strobe_percent; - out->max_avg_sdp_bw_use_normal_percent = in_soc_params->max_avg_sdp_bw_use_normal_percent; - out->max_outstanding_reqs = in_soc_params->max_request_size_bytes; - out->num_chans = in_soc_params->num_chans; - out->pct_ideal_dram_bw_after_urgent_strobe = in_soc_params->pct_ideal_dram_bw_after_urgent_strobe; - out->pct_ideal_dram_bw_after_urgent_vm_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_vm_only; - out->pct_ideal_fabric_bw_after_urgent = in_soc_params->pct_ideal_fabric_bw_after_urgent; - out->pct_ideal_sdp_bw_after_urgent = in_soc_params->pct_ideal_sdp_bw_after_urgent; - out->phy_downspread_percent = in_soc_params->downspread_percent; - out->refclk_mhz = 50; // As per hardcoded reference. - out->return_bus_width_bytes = in_soc_params->return_bus_width_bytes; - out->round_trip_ping_latency_dcfclk_cycles = in_soc_params->round_trip_ping_latency_dcfclk_cycles; - out->smn_latency_us = in_soc_params->smn_latency_us; - out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; - out->urgent_out_of_order_return_per_channel_pixel_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_only_bytes; - out->urgent_out_of_order_return_per_channel_vm_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_vm_only_bytes; - out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; - out->pct_ideal_dram_bw_after_urgent_pixel_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_only; - out->dcn_downspread_percent = in_soc_params->dcn_downspread_percent; -} - -void dml2_translate_soc_states(const struct dc *dc, struct soc_states_st *out, int num_states) -{ - unsigned int i = 0; - out->num_states = num_states; - - for (i = 0; i < out->num_states; i++) { - out->state_array[i].dcfclk_mhz = dc->dml.soc.clock_limits[i].dcfclk_mhz; - out->state_array[i].dispclk_mhz = dc->dml.soc.clock_limits[i].dispclk_mhz; - out->state_array[i].dppclk_mhz = dc->dml.soc.clock_limits[i].dppclk_mhz; - out->state_array[i].dram_speed_mts = dc->dml.soc.clock_limits[i].dram_speed_mts; - out->state_array[i].dtbclk_mhz = dc->dml.soc.clock_limits[i].dtbclk_mhz; - out->state_array[i].socclk_mhz = dc->dml.soc.clock_limits[i].socclk_mhz; - out->state_array[i].fabricclk_mhz = dc->dml.soc.clock_limits[i].fabricclk_mhz; - out->state_array[i].dscclk_mhz = dc->dml.soc.clock_limits[i].dscclk_mhz; - out->state_array[i].phyclk_d18_mhz = dc->dml.soc.clock_limits[i].phyclk_d18_mhz; - out->state_array[i].phyclk_d32_mhz = dc->dml.soc.clock_limits[i].phyclk_d32_mhz; - out->state_array[i].phyclk_mhz = dc->dml.soc.clock_limits[i].phyclk_mhz; - out->state_array[i].sr_enter_plus_exit_time_us = dc->dml.soc.sr_enter_plus_exit_time_us; - out->state_array[i].sr_exit_time_us = dc->dml.soc.sr_exit_time_us; - out->state_array[i].fclk_change_latency_us = dc->dml.soc.fclk_change_latency_us; - out->state_array[i].dram_clock_change_latency_us = dc->dml.soc.dram_clock_change_latency_us; - out->state_array[i].usr_retraining_latency_us = dc->dml.soc.usr_retraining_latency_us; - out->state_array[i].writeback_latency_us = dc->dml.soc.writeback_latency_us; - /* Driver initialized values for these are different than the spreadsheet. Use the - * spreadsheet ones for now. We need to decided which ones to use. - */ - out->state_array[i].sr_exit_z8_time_us = dc->dml.soc.sr_exit_z8_time_us; - out->state_array[i].sr_enter_plus_exit_z8_time_us = dc->dml.soc.sr_enter_plus_exit_z8_time_us; - //out->state_array[i].sr_exit_z8_time_us = 5.20; - //out->state_array[i].sr_enter_plus_exit_z8_time_us = 9.60; - out->state_array[i].use_ideal_dram_bw_strobe = true; - out->state_array[i].urgent_latency_pixel_data_only_us = dc->dml.soc.urgent_latency_pixel_data_only_us; - out->state_array[i].urgent_latency_pixel_mixed_with_vm_data_us = dc->dml.soc.urgent_latency_pixel_mixed_with_vm_data_us; - out->state_array[i].urgent_latency_vm_data_only_us = dc->dml.soc.urgent_latency_vm_data_only_us; - out->state_array[i].urgent_latency_adjustment_fabric_clock_component_us = dc->dml.soc.urgent_latency_adjustment_fabric_clock_component_us; - out->state_array[i].urgent_latency_adjustment_fabric_clock_reference_mhz = dc->dml.soc.urgent_latency_adjustment_fabric_clock_reference_mhz; - } -} - -static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st *out, unsigned int location, const struct dc_stream_state *in) -{ - dml_uint_t hblank_start, vblank_start; - - out->HActive[location] = in->timing.h_addressable + in->timing.h_border_left + in->timing.h_border_right; - out->VActive[location] = in->timing.v_addressable + in->timing.v_border_bottom + in->timing.v_border_top; - out->RefreshRate[location] = ((in->timing.pix_clk_100hz * 100) / in->timing.h_total) / in->timing.v_total; - out->VFrontPorch[location] = in->timing.v_front_porch; - out->PixelClock[location] = in->timing.pix_clk_100hz / 10000.00; - if (in->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - out->PixelClock[location] *= 2; - out->HTotal[location] = in->timing.h_total; - out->VTotal[location] = in->timing.v_total; - out->Interlace[location] = in->timing.flags.INTERLACE; - hblank_start = in->timing.h_total - in->timing.h_front_porch; - out->HBlankEnd[location] = hblank_start - - in->timing.h_addressable - - in->timing.h_border_left - - in->timing.h_border_right; - vblank_start = in->timing.v_total - in->timing.v_front_porch; - out->VBlankEnd[location] = vblank_start - - in->timing.v_addressable - - in->timing.v_border_top - - in->timing.v_border_bottom; - out->DRRDisplay[location] = false; -} - -static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) -{ - unsigned int output_bpc; - - out->DSCEnable[location] = (enum dml_dsc_enable)in->timing.flags.DSC; - out->OutputLinkDPLanes[location] = 4; // As per code in dcn20_resource.c - out->DSCInputBitPerComponent[location] = 12; // As per code in dcn20_resource.c - out->DSCSlices[location] = in->timing.dsc_cfg.num_slices_h; - - switch (in->signal) { - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_DISPLAY_PORT: - out->OutputEncoder[location] = dml_dp; - if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) - out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; - break; - case SIGNAL_TYPE_EDP: - out->OutputEncoder[location] = dml_edp; - break; - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - out->OutputEncoder[location] = dml_hdmi; - break; - default: - out->OutputEncoder[location] = dml_dp; - } - - switch (in->timing.display_color_depth) { - case COLOR_DEPTH_666: - output_bpc = 6; - break; - case COLOR_DEPTH_888: - output_bpc = 8; - break; - case COLOR_DEPTH_101010: - output_bpc = 10; - break; - case COLOR_DEPTH_121212: - output_bpc = 12; - break; - case COLOR_DEPTH_141414: - output_bpc = 14; - break; - case COLOR_DEPTH_161616: - output_bpc = 16; - break; - case COLOR_DEPTH_999: - output_bpc = 9; - break; - case COLOR_DEPTH_111111: - output_bpc = 11; - break; - default: - output_bpc = 8; - break; - } - - switch (in->timing.pixel_encoding) { - case PIXEL_ENCODING_RGB: - case PIXEL_ENCODING_YCBCR444: - out->OutputFormat[location] = dml_444; - out->OutputBpp[location] = (dml_float_t)output_bpc * 3; - break; - case PIXEL_ENCODING_YCBCR420: - out->OutputFormat[location] = dml_420; - out->OutputBpp[location] = (output_bpc * 3.0) / 2; - break; - case PIXEL_ENCODING_YCBCR422: - if (in->timing.flags.DSC && !in->timing.dsc_cfg.ycbcr422_simple) - out->OutputFormat[location] = dml_n422; - else - out->OutputFormat[location] = dml_s422; - out->OutputBpp[location] = (dml_float_t)output_bpc * 2; - break; - default: - out->OutputFormat[location] = dml_444; - out->OutputBpp[location] = (dml_float_t)output_bpc * 3; - break; - } - - if (in->timing.flags.DSC) { - out->OutputBpp[location] = in->timing.dsc_cfg.bits_per_pixel / 16.0; - } - - // This has been false throughout DCN32x development. If needed we can change this later on. - out->OutputMultistreamEn[location] = false; - - switch (in->signal) { - case SIGNAL_TYPE_NONE: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_LVDS: - case SIGNAL_TYPE_RGB: - case SIGNAL_TYPE_DISPLAY_PORT: - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_EDP: - case SIGNAL_TYPE_VIRTUAL: - default: - out->OutputLinkDPRate[location] = dml_dp_rate_na; - break; - } - - out->PixelClockBackEnd[location] = in->timing.pix_clk_100hz / 10000.00; - - out->AudioSampleLayout[location] = in->audio_info.modes->sample_size; - out->AudioSampleRate[location] = in->audio_info.modes->max_bit_rate; - - out->OutputDisabled[location] = true; -} - -static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsigned int location, const struct dc_stream_state *in) -{ - out->SurfaceWidthY[location] = in->timing.h_addressable; - out->SurfaceHeightY[location] = in->timing.v_addressable; - out->SurfaceWidthC[location] = in->timing.h_addressable; - out->SurfaceHeightC[location] = in->timing.v_addressable; - out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128; - out->PitchC[location] = 1; - out->DCCEnable[location] = false; - out->DCCMetaPitchY[location] = 0; - out->DCCMetaPitchC[location] = 0; - out->DCCRateLuma[location] = 1.0; - out->DCCRateChroma[location] = 1.0; - out->DCCFractionOfZeroSizeRequestsLuma[location] = 0; - out->DCCFractionOfZeroSizeRequestsChroma[location] = 0; - out->SurfaceTiling[location] = dml_sw_64kb_r_x; - out->SourcePixelFormat[location] = dml_444_32; -} - -static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_project, struct dml_surface_cfg_st *out, unsigned int location, const struct dc_plane_state *in) -{ - out->PitchY[location] = in->plane_size.surface_pitch; - out->SurfaceHeightY[location] = in->plane_size.surface_size.height; - out->SurfaceWidthY[location] = in->plane_size.surface_size.width; - out->SurfaceHeightC[location] = in->plane_size.chroma_size.height; - out->SurfaceWidthC[location] = in->plane_size.chroma_size.width; - out->PitchC[location] = in->plane_size.chroma_pitch; - out->DCCEnable[location] = in->dcc.enable; - out->DCCMetaPitchY[location] = in->dcc.meta_pitch; - out->DCCMetaPitchC[location] = in->dcc.meta_pitch_c; - out->DCCRateLuma[location] = 1.0; - out->DCCRateChroma[location] = 1.0; - out->DCCFractionOfZeroSizeRequestsLuma[location] = in->dcc.independent_64b_blks; - out->DCCFractionOfZeroSizeRequestsChroma[location] = in->dcc.independent_64b_blks_c; - - switch (dml2_project) { - default: - out->SurfaceTiling[location] = (enum dml_swizzle_mode)in->tiling_info.gfx9.swizzle; - break; - case dml_project_dcn401: - // Temporary use gfx11 swizzle in dml, until proper dml for DCN4x is integrated/implemented - switch (in->tiling_info.gfx_addr3.swizzle) { - case DC_ADDR3_SW_4KB_2D: - case DC_ADDR3_SW_64KB_2D: - case DC_ADDR3_SW_256KB_2D: - default: - out->SurfaceTiling[location] = dml_sw_64kb_r_x; - break; - case DC_ADDR3_SW_LINEAR: - out->SurfaceTiling[location] = dml_sw_linear; - break; - } - } - - switch (in->format) { - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: - out->SourcePixelFormat[location] = dml_420_8; - break; - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: - out->SourcePixelFormat[location] = dml_420_10; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - out->SourcePixelFormat[location] = dml_444_64; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - out->SourcePixelFormat[location] = dml_444_16; - break; - case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: - out->SourcePixelFormat[location] = dml_444_8; - break; - case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: - out->SourcePixelFormat[location] = dml_rgbe_alpha; - break; - default: - out->SourcePixelFormat[location] = dml_444_32; - break; - } -} - -static struct scaler_data *get_scaler_data_for_plane( - const struct dc_plane_state *in, - struct dc_state *context) -{ - int i; - struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; - - memset(temp_pipe, 0, sizeof(struct pipe_ctx)); - - for (i = 0; i < MAX_PIPES; i++) { - const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state == in && !pipe->prev_odm_pipe) { - temp_pipe->stream = pipe->stream; - temp_pipe->plane_state = pipe->plane_state; - temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; - temp_pipe->stream_res = pipe->stream_res; - resource_build_scaling_params(temp_pipe); - break; - } - } - - ASSERT(i < MAX_PIPES); - return &temp_pipe->plane_res.scl_data; -} - -static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, - const struct soc_bounding_box_st *soc) -{ - dml_uint_t width, height; - - if (in->timing.h_addressable > 3840) - width = 3840; - else - width = in->timing.h_addressable; // 4K max - - if (in->timing.v_addressable > 2160) - height = 2160; - else - height = in->timing.v_addressable; // 4K max - - out->CursorBPP[location] = dml_cur_32bit; - out->CursorWidth[location] = 256; - - out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; - - out->ViewportWidth[location] = width; - out->ViewportHeight[location] = height; - out->ViewportStationary[location] = false; - out->ViewportWidthChroma[location] = 0; - out->ViewportHeightChroma[location] = 0; - out->ViewportXStart[location] = 0; - out->ViewportXStartC[location] = 0; - out->ViewportYStart[location] = 0; - out->ViewportYStartC[location] = 0; - - out->ScalerEnabled[location] = false; - out->HRatio[location] = 1.0; - out->VRatio[location] = 1.0; - out->HRatioChroma[location] = 0; - out->VRatioChroma[location] = 0; - out->HTaps[location] = 1; - out->VTaps[location] = 1; - out->HTapsChroma[location] = 0; - out->VTapsChroma[location] = 0; - out->SourceScan[location] = dml_rotation_0; - out->ScalerRecoutWidth[location] = width; - - out->LBBitPerPixel[location] = 57; - - out->DynamicMetadataEnable[location] = false; - - out->NumberOfCursors[location] = 1; - out->UseMALLForStaticScreen[location] = dml_use_mall_static_screen_disable; - out->UseMALLForPStateChange[location] = dml_use_mall_pstate_change_disable; - - out->DETSizeOverride[location] = 256; - - out->ScalerEnabled[location] = false; -} - -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, - const struct dc_plane_state *in, struct dc_state *context, - const struct soc_bounding_box_st *soc) -{ - struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); - - out->CursorBPP[location] = dml_cur_32bit; - out->CursorWidth[location] = 256; - - out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; - - out->ViewportWidth[location] = scaler_data->viewport.width; - out->ViewportHeight[location] = scaler_data->viewport.height; - out->ViewportWidthChroma[location] = scaler_data->viewport_c.width; - out->ViewportHeightChroma[location] = scaler_data->viewport_c.height; - out->ViewportXStart[location] = scaler_data->viewport.x; - out->ViewportYStart[location] = scaler_data->viewport.y; - out->ViewportXStartC[location] = scaler_data->viewport_c.x; - out->ViewportYStartC[location] = scaler_data->viewport_c.y; - out->ViewportStationary[location] = false; - - out->ScalerEnabled[location] = scaler_data->ratios.horz.value != dc_fixpt_one.value || - scaler_data->ratios.horz_c.value != dc_fixpt_one.value || - scaler_data->ratios.vert.value != dc_fixpt_one.value || - scaler_data->ratios.vert_c.value != dc_fixpt_one.value; - - /* Current driver code base uses LBBitPerPixel as 57. There is a discrepancy - * from the HW/DML teams about this value. Initialize LBBitPerPixel with the - * value current used in Navi3x . - */ - - out->LBBitPerPixel[location] = 57; - - if (out->ScalerEnabled[location] == false) { - out->HRatio[location] = 1; - out->HRatioChroma[location] = 1; - out->VRatio[location] = 1; - out->VRatioChroma[location] = 1; - } else { - /* Follow the original dml_wrapper.c code direction to fix scaling issues */ - out->HRatio[location] = (dml_float_t)scaler_data->ratios.horz.value / (1ULL << 32); - out->HRatioChroma[location] = (dml_float_t)scaler_data->ratios.horz_c.value / (1ULL << 32); - out->VRatio[location] = (dml_float_t)scaler_data->ratios.vert.value / (1ULL << 32); - out->VRatioChroma[location] = (dml_float_t)scaler_data->ratios.vert_c.value / (1ULL << 32); - } - - if (!scaler_data->taps.h_taps) { - out->HTaps[location] = 1; - out->HTapsChroma[location] = 1; - } else { - out->HTaps[location] = scaler_data->taps.h_taps; - out->HTapsChroma[location] = scaler_data->taps.h_taps_c; - } - if (!scaler_data->taps.v_taps) { - out->VTaps[location] = 1; - out->VTapsChroma[location] = 1; - } else { - out->VTaps[location] = scaler_data->taps.v_taps; - out->VTapsChroma[location] = scaler_data->taps.v_taps_c; - } - - out->SourceScan[location] = (enum dml_rotation_angle)in->rotation; - out->ScalerRecoutWidth[location] = in->dst_rect.width; - - out->DynamicMetadataEnable[location] = false; - out->DynamicMetadataLinesBeforeActiveRequired[location] = 0; - out->DynamicMetadataTransmittedBytes[location] = 0; - - out->NumberOfCursors[location] = 1; -} - -static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, - const struct dc_stream_state *stream, const struct dml_display_cfg_st *dml_dispcfg) -{ - int i = 0; - int location = -1; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { - location = i; - break; - } - } - - return location; -} - -static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) -{ - int i, j; - bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; - - if (!plane_id) - return false; - - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->stream_id == stream_id) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane && - (!is_plane_duplicate || (j == plane_index))) { - *plane_id = (i << 16) | j; - return true; - } - } - } - } - - return false; -} - -static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, - const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) -{ - unsigned int plane_id; - int i = 0; - int location = -1; - - if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) { - ASSERT(false); - return -1; - } - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { - location = i; - break; - } - } - - return location; -} - -static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) -{ - unsigned int i; - unsigned int pipe_index = 0; - unsigned int plane_index = 0; - struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false; - dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0; - } - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; - - if (!pipe || !pipe->stream || !pipe->plane_state) - continue; - - while (pipe) { - pipe_index = pipe->pipe_idx; - - if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) { - dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index; - plane_index++; - dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true; - } - - pipe = pipe->bottom_pipe; - } - - plane_index = 0; - } -} - -static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, - unsigned int location, const struct dc_stream_state *in) -{ - if (in->num_wb_info > 0) { - for (int i = 0; i < __DML_NUM_DMB__; i++) { - const struct dc_writeback_info *wb_info = &in->writeback_info[i]; - /*current dml support 1 dwb per stream, limitation*/ - if (wb_info->wb_enabled) { - out->WritebackEnable[location] = wb_info->wb_enabled; - out->ActiveWritebacksPerSurface[location] = wb_info->dwb_params.cnv_params.src_width; - out->WritebackDestinationWidth[location] = wb_info->dwb_params.dest_width; - out->WritebackDestinationHeight[location] = wb_info->dwb_params.dest_height; - - out->WritebackSourceWidth[location] = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_width : - wb_info->dwb_params.cnv_params.src_width; - - out->WritebackSourceHeight[location] = wb_info->dwb_params.cnv_params.crop_en ? - wb_info->dwb_params.cnv_params.crop_height : - wb_info->dwb_params.cnv_params.src_height; - /*current design does not have chroma scaling, need to follow up*/ - out->WritebackHTaps[location] = wb_info->dwb_params.scaler_taps.h_taps > 0 ? - wb_info->dwb_params.scaler_taps.h_taps : 1; - out->WritebackVTaps[location] = wb_info->dwb_params.scaler_taps.v_taps > 0 ? - wb_info->dwb_params.scaler_taps.v_taps : 1; - - out->WritebackHRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_width / - (double)wb_info->dwb_params.dest_width : - (double)wb_info->dwb_params.cnv_params.src_width / - (double)wb_info->dwb_params.dest_width; - out->WritebackVRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? - (double)wb_info->dwb_params.cnv_params.crop_height / - (double)wb_info->dwb_params.dest_height : - (double)wb_info->dwb_params.cnv_params.src_height / - (double)wb_info->dwb_params.dest_height; - } - } - } -} - -static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context) -{ - int i; - struct pipe_ctx *current_pipe_context; - - /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */ - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1; - } - - /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */ - for (i = 0; i < MAX_PIPES; i++) { - current_pipe_context = &context->res_ctx.pipe_ctx[i]; - if (current_pipe_context->stream && - current_pipe_context->stream_res.hpo_dp_stream_enc && - current_pipe_context->link_res.hpo_dp_link_enc && - dc_is_dp_signal(current_pipe_context->stream->signal)) { - dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] = - current_pipe_context->link_res.hpo_dp_link_enc->inst; - } - } -} - -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) -{ - int i = 0, j = 0, k = 0; - int disp_cfg_stream_location, disp_cfg_plane_location; - enum mall_stream_type stream_mall_type; - struct pipe_ctx *current_pipe_context; - - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] = false; - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = false; - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = false; - } - - //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file - dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; - dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; - dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; - dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; - if (dml2->v20.dml_core_ctx.ip.hostvm_enable) - dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; - - dml2_populate_pipe_to_plane_index_mapping(dml2, context); - dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context); - - for (i = 0; i < context->stream_count; i++) { - current_pipe_context = NULL; - for (k = 0; k < MAX_PIPES; k++) { - /* find one pipe allocated to this stream for the purpose of getting - info about the link later */ - if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) { - current_pipe_context = &context->res_ctx.pipe_ctx[k]; - break; - } - } - disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); - stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); - - if (disp_cfg_stream_location < 0) - disp_cfg_stream_location = dml_dispcfg->num_timings++; - - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - - populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); - /*Call site for populate_dml_writeback_cfg_from_stream_state*/ - populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, - disp_cfg_stream_location, context->streams[i]); - - switch (context->streams[i]->debug.force_odm_combine_segments) { - case 2: - dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1; - break; - case 4: - dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_4to1; - break; - default: - break; - } - - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[i]->stream_id; - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; - - if (context->stream_status[i].plane_count == 0) { - disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - - populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); - populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, - context->streams[i], &dml2->v20.dml_core_ctx.soc); - - dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; - - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; - } else { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2, - context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j); - - if (disp_cfg_plane_location < 0) - disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - - populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); - populate_dml_plane_cfg_from_plane_state( - &dml_dispcfg->plane, disp_cfg_plane_location, - context->stream_status[i].plane_states[j], context, - &dml2->v20.dml_core_ctx.soc); - - if (stream_mall_type == SUBVP_MAIN) { - dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; - dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; - } else if (stream_mall_type == SUBVP_PHANTOM) { - dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; - dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable; - dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required; - } else { - dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_disable; - dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; - } - - dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; - - if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j, - &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; - - if (j >= 1) { - populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2); - switch (context->streams[i]->debug.force_odm_combine_segments) { - case 2: - dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; - break; - case 4: - dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_4to1; - break; - default: - break; - } - - if (stream_mall_type == SUBVP_MAIN) - dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; - else if (stream_mall_type == SUBVP_PHANTOM) - dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; - - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id; - dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_plane_location] = true; - - dml_dispcfg->num_timings++; - } - } - } - } -} - -void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, - struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, - struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, - struct pipe_ctx *out) -{ - memset(&out->rq_regs, 0, sizeof(out->rq_regs)); - out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; - out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; - out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; - out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; - out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; - out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; - out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; - out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; - - out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; - out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; - out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; - out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; - out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; - out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; - out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; - out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; - - out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; - out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; - out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; - out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; - out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; - - memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); - out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; - out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; - out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; - out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; - out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; - out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; - out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; - out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; - out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; - out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; - out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; - out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; - out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; - out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; - out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; - out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; - out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; - out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; - out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; - out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; - out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; - out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; - out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; - out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; - out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; - out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; - out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; - out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; - out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; - out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; - out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; - out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; - out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; - out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; - out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; - out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; - out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; - out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; - out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; - out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; - out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; - out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; - out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; - out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; - out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; - out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; - - memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); - out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; - out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; - out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; - out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; - out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; - out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; - out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; - out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; - out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; - out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; - out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; - out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; - out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; - out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; - out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; - out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; - out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; - out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; - out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; - out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h deleted file mode 100644 index d764773938f4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML2_TRANSLATION_HELPER_H__ -#define __DML2_TRANSLATION_HELPER_H__ - -void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out); -void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out); -void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, - const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out); -void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out); -void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out); -void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); -void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); - -#endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c deleted file mode 100644 index 9a33158b63bf..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ /dev/null @@ -1,560 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -//#include "dml2_utils.h" -#include "display_mode_core.h" -#include "dml_display_rq_dlg_calc.h" -#include "dml2_internal_types.h" -#include "dml2_translation_helper.h" -#include "dml2_utils.h" - -void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index) -{ - dml_timing_array->HTotal[dst_index] = dml_timing_array->HTotal[src_index]; - dml_timing_array->VTotal[dst_index] = dml_timing_array->VTotal[src_index]; - dml_timing_array->HBlankEnd[dst_index] = dml_timing_array->HBlankEnd[src_index]; - dml_timing_array->VBlankEnd[dst_index] = dml_timing_array->VBlankEnd[src_index]; - dml_timing_array->RefreshRate[dst_index] = dml_timing_array->RefreshRate[src_index]; - dml_timing_array->VFrontPorch[dst_index] = dml_timing_array->VFrontPorch[src_index]; - dml_timing_array->PixelClock[dst_index] = dml_timing_array->PixelClock[src_index]; - dml_timing_array->HActive[dst_index] = dml_timing_array->HActive[src_index]; - dml_timing_array->VActive[dst_index] = dml_timing_array->VActive[src_index]; - dml_timing_array->Interlace[dst_index] = dml_timing_array->Interlace[src_index]; - dml_timing_array->DRRDisplay[dst_index] = dml_timing_array->DRRDisplay[src_index]; - dml_timing_array->VBlankNom[dst_index] = dml_timing_array->VBlankNom[src_index]; -} - -void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index) -{ - dml_plane_array->GPUVMMinPageSizeKBytes[dst_index] = dml_plane_array->GPUVMMinPageSizeKBytes[src_index]; - dml_plane_array->ForceOneRowForFrame[dst_index] = dml_plane_array->ForceOneRowForFrame[src_index]; - dml_plane_array->PTEBufferModeOverrideEn[dst_index] = dml_plane_array->PTEBufferModeOverrideEn[src_index]; - dml_plane_array->PTEBufferMode[dst_index] = dml_plane_array->PTEBufferMode[src_index]; - dml_plane_array->ViewportWidth[dst_index] = dml_plane_array->ViewportWidth[src_index]; - dml_plane_array->ViewportHeight[dst_index] = dml_plane_array->ViewportHeight[src_index]; - dml_plane_array->ViewportWidthChroma[dst_index] = dml_plane_array->ViewportWidthChroma[src_index]; - dml_plane_array->ViewportHeightChroma[dst_index] = dml_plane_array->ViewportHeightChroma[src_index]; - dml_plane_array->ViewportXStart[dst_index] = dml_plane_array->ViewportXStart[src_index]; - dml_plane_array->ViewportXStartC[dst_index] = dml_plane_array->ViewportXStartC[src_index]; - dml_plane_array->ViewportYStart[dst_index] = dml_plane_array->ViewportYStart[src_index]; - dml_plane_array->ViewportYStartC[dst_index] = dml_plane_array->ViewportYStartC[src_index]; - dml_plane_array->ViewportStationary[dst_index] = dml_plane_array->ViewportStationary[src_index]; - - dml_plane_array->ScalerEnabled[dst_index] = dml_plane_array->ScalerEnabled[src_index]; - dml_plane_array->HRatio[dst_index] = dml_plane_array->HRatio[src_index]; - dml_plane_array->VRatio[dst_index] = dml_plane_array->VRatio[src_index]; - dml_plane_array->HRatioChroma[dst_index] = dml_plane_array->HRatioChroma[src_index]; - dml_plane_array->VRatioChroma[dst_index] = dml_plane_array->VRatioChroma[src_index]; - dml_plane_array->HTaps[dst_index] = dml_plane_array->HTaps[src_index]; - dml_plane_array->VTaps[dst_index] = dml_plane_array->VTaps[src_index]; - dml_plane_array->HTapsChroma[dst_index] = dml_plane_array->HTapsChroma[src_index]; - dml_plane_array->VTapsChroma[dst_index] = dml_plane_array->VTapsChroma[src_index]; - dml_plane_array->LBBitPerPixel[dst_index] = dml_plane_array->LBBitPerPixel[src_index]; - - dml_plane_array->SourceScan[dst_index] = dml_plane_array->SourceScan[src_index]; - dml_plane_array->ScalerRecoutWidth[dst_index] = dml_plane_array->ScalerRecoutWidth[src_index]; - - dml_plane_array->DynamicMetadataEnable[dst_index] = dml_plane_array->DynamicMetadataEnable[src_index]; - dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[dst_index] = dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[src_index]; - dml_plane_array->DynamicMetadataTransmittedBytes[dst_index] = dml_plane_array->DynamicMetadataTransmittedBytes[src_index]; - dml_plane_array->DETSizeOverride[dst_index] = dml_plane_array->DETSizeOverride[src_index]; - - dml_plane_array->NumberOfCursors[dst_index] = dml_plane_array->NumberOfCursors[src_index]; - dml_plane_array->CursorWidth[dst_index] = dml_plane_array->CursorWidth[src_index]; - dml_plane_array->CursorBPP[dst_index] = dml_plane_array->CursorBPP[src_index]; - - dml_plane_array->UseMALLForStaticScreen[dst_index] = dml_plane_array->UseMALLForStaticScreen[src_index]; - dml_plane_array->UseMALLForPStateChange[dst_index] = dml_plane_array->UseMALLForPStateChange[src_index]; - - dml_plane_array->BlendingAndTiming[dst_index] = dml_plane_array->BlendingAndTiming[src_index]; -} - -void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index) -{ - dml_surface_array->SurfaceTiling[dst_index] = dml_surface_array->SurfaceTiling[src_index]; - dml_surface_array->SourcePixelFormat[dst_index] = dml_surface_array->SourcePixelFormat[src_index]; - dml_surface_array->PitchY[dst_index] = dml_surface_array->PitchY[src_index]; - dml_surface_array->SurfaceWidthY[dst_index] = dml_surface_array->SurfaceWidthY[src_index]; - dml_surface_array->SurfaceHeightY[dst_index] = dml_surface_array->SurfaceHeightY[src_index]; - dml_surface_array->PitchC[dst_index] = dml_surface_array->PitchC[src_index]; - dml_surface_array->SurfaceWidthC[dst_index] = dml_surface_array->SurfaceWidthC[src_index]; - dml_surface_array->SurfaceHeightC[dst_index] = dml_surface_array->SurfaceHeightC[src_index]; - - dml_surface_array->DCCEnable[dst_index] = dml_surface_array->DCCEnable[src_index]; - dml_surface_array->DCCMetaPitchY[dst_index] = dml_surface_array->DCCMetaPitchY[src_index]; - dml_surface_array->DCCMetaPitchC[dst_index] = dml_surface_array->DCCMetaPitchC[src_index]; - - dml_surface_array->DCCRateLuma[dst_index] = dml_surface_array->DCCRateLuma[src_index]; - dml_surface_array->DCCRateChroma[dst_index] = dml_surface_array->DCCRateChroma[src_index]; - dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[src_index]; - dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[src_index]; -} - -void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index) -{ - dml_output_array->DSCInputBitPerComponent[dst_index] = dml_output_array->DSCInputBitPerComponent[src_index]; - dml_output_array->OutputFormat[dst_index] = dml_output_array->OutputFormat[src_index]; - dml_output_array->OutputEncoder[dst_index] = dml_output_array->OutputEncoder[src_index]; - dml_output_array->OutputMultistreamId[dst_index] = dml_output_array->OutputMultistreamId[src_index]; - dml_output_array->OutputMultistreamEn[dst_index] = dml_output_array->OutputMultistreamEn[src_index]; - dml_output_array->OutputBpp[dst_index] = dml_output_array->OutputBpp[src_index]; - dml_output_array->PixelClockBackEnd[dst_index] = dml_output_array->PixelClockBackEnd[src_index]; - dml_output_array->DSCEnable[dst_index] = dml_output_array->DSCEnable[src_index]; - dml_output_array->OutputLinkDPLanes[dst_index] = dml_output_array->OutputLinkDPLanes[src_index]; - dml_output_array->OutputLinkDPRate[dst_index] = dml_output_array->OutputLinkDPRate[src_index]; - dml_output_array->ForcedOutputLinkBPP[dst_index] = dml_output_array->ForcedOutputLinkBPP[src_index]; - dml_output_array->AudioSampleRate[dst_index] = dml_output_array->AudioSampleRate[src_index]; - dml_output_array->AudioSampleLayout[dst_index] = dml_output_array->AudioSampleLayout[src_index]; -} - -unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled) -{ - switch (encoder) { - case dml_dp: - case dml_edp: - return 2; - case dml_dp2p0: - if (dsc_enabled || force_odm_4to1) - return 4; - else - return 2; - case dml_hdmi: - return 1; - case dml_hdmifrl: - if (force_odm_4to1) - return 4; - else - return 2; - default: - return 1; - } -} - -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) -{ - if (pipe_ctx == NULL || pipe_ctx->stream == NULL) - return false; - - /* If this assert is hit then we have a link encoder dynamic management issue */ - ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); - - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal)); -} - -bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context) -{ - context->bw_ctx.bw.dcn.clk.dispclk_khz = out_clks->dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = out_clks->dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = out_clks->uclk_mts / 16; - context->bw_ctx.bw.dcn.clk.fclk_khz = out_clks->fclk_khz; - context->bw_ctx.bw.dcn.clk.phyclk_khz = out_clks->phyclk_khz; - context->bw_ctx.bw.dcn.clk.socclk_khz = out_clks->socclk_khz; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = out_clks->ref_dtbclk_khz; - context->bw_ctx.bw.dcn.clk.p_state_change_support = out_clks->p_state_supported; -} - -int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) -{ - int i; - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) - return i; - } - - return -1; -} - -static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) -{ - int i; - for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { - if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) - return i; - } - - return -1; -} - -static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) -{ - unsigned int i, j; - bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; - - if (!plane_id) - return false; - - for (i = 0; i < state->stream_count; i++) { - if (state->streams[i]->stream_id == stream_id) { - for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane && - (!is_plane_duplicate || (j == plane_index))) { - *plane_id = (i << 16) | j; - return true; - } - } - } - } - - return false; -} - -static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) -{ - unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; - struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - - hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right; - vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; - hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; - vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; - - hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right; - vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; - - pipe_ctx->pipe_dlg_param.vstartup_start = dml_get_vstartup_calculated(mode_lib, pipe_idx); - pipe_ctx->pipe_dlg_param.vupdate_offset = dml_get_vupdate_offset(mode_lib, pipe_idx); - pipe_ctx->pipe_dlg_param.vupdate_width = dml_get_vupdate_width(mode_lib, pipe_idx); - pipe_ctx->pipe_dlg_param.vready_offset = dml_get_vready_offset(mode_lib, pipe_idx); - - pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; - - pipe_ctx->pipe_dlg_param.hactive = hactive; - pipe_ctx->pipe_dlg_param.vactive = vactive; - pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; - pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; - pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; - pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; - pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; - pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; - pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; - pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; - pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; - pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; - pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; - pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; - pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; - pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; -} - -void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) -{ - unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id; - enum mall_stream_type pipe_mall_type; - struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch; - - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - - if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; - else - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; - - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - context->bw_ctx.bw.dcn.compbuf_size_kb = in_ctx->v20.dml_core_ctx.ip.config_return_buffer_size_in_kbytes; - - for (dc_pipe_ctx_index = 0; dc_pipe_ctx_index < pipe_cnt; dc_pipe_ctx_index++) { - if (!context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream) - continue; - /* The DML2 and the DC logic of determining pipe indices are different from each other so - * there is a need to know which DML pipe index maps to which DC pipe. The code below - * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then - * it finds a dml_pipe_index from the stream id. */ - if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, - in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) { - dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); - } else { - dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); - } - - if (dml_pipe_idx == 0xFFFFFFFF) - continue; - ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[dml_pipe_idx]); - ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_pipe_idx] == context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); - - /* Use the dml_pipe_index here for the getters to fetch the correct values and dc_pipe_index in the pipe_ctx to populate them - * at the right locations. - */ - populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); - - pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]); - if (pipe_mall_type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = dml_get_det_buffer_size_kbytes(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); - // Unbounded requesting should not ever be used when more than 1 pipe is enabled. - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState; - } - - context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb; - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz = dml_get_dppclk_calculated(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx) * 1000; - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz) - context->bw_ctx.bw.dcn.clk.dppclk_khz = context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz; - - dml_rq_dlg_get_rq_reg(&s->rq_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); - dml_rq_dlg_get_dlg_reg(&s->disp_dlg_regs, &s->disp_ttu_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); - dml2_update_pipe_ctx_dchub_regs(&s->rq_regs, &s->disp_dlg_regs, &s->disp_ttu_regs, &out_new_hw_state->pipe_ctx[dc_pipe_ctx_index]); - - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes = dml_get_surface_size_for_mall(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); - - /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ - /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ - if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state && - (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe == NULL || - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) && - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) { - /* SS: all active surfaces stored in MALL */ - if (pipe_mall_type != SUBVP_PHANTOM) { - context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; - } else { - /* SUBVP: phantom surfaces only stored in MALL */ - context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; - } - } - } - - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dppclk_mhz - * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dispclk_mhz - * 1000; - - if (dc->config.forced_clocks || dc->debug.max_disp_clk) { - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz ; - } -} - -void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx) -{ - watermark->urgent_ns = dml_get_wm_urgent(dml_core_ctx) * 1000; - watermark->cstate_pstate.cstate_enter_plus_exit_ns = dml_get_wm_stutter_enter_exit(dml_core_ctx) * 1000; - watermark->cstate_pstate.cstate_exit_ns = dml_get_wm_stutter_exit(dml_core_ctx) * 1000; - watermark->cstate_pstate.pstate_change_ns = dml_get_wm_dram_clock_change(dml_core_ctx) * 1000; - watermark->pte_meta_urgent_ns = dml_get_wm_memory_trip(dml_core_ctx) * 1000; - watermark->frac_urg_bw_nom = dml_get_fraction_of_urgent_bandwidth(dml_core_ctx) * 1000; - watermark->frac_urg_bw_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(dml_core_ctx) * 1000; - watermark->urgent_latency_ns = dml_get_urgent_latency(dml_core_ctx) * 1000; - watermark->cstate_pstate.fclk_pstate_change_ns = dml_get_wm_fclk_change(dml_core_ctx) * 1000; - watermark->usr_retraining_ns = dml_get_wm_usr_retraining(dml_core_ctx) * 1000; - watermark->cstate_pstate.cstate_enter_plus_exit_z8_ns = dml_get_wm_z8_stutter_enter_exit(dml_core_ctx) * 1000; - watermark->cstate_pstate.cstate_exit_z8_ns = dml_get_wm_z8_stutter(dml_core_ctx) * 1000; -} - -unsigned int dml2_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark) -{ - unsigned int time_per_byte = 0; - unsigned int total_free_entry = 0xb40; - unsigned int buf_lh_capability; - unsigned int max_scaled_time; - - if (mode == PACKED_444) /* packed mode 32 bpp */ - time_per_byte = time_per_pixel/4; - else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */ - time_per_byte = time_per_pixel/8; - - if (time_per_byte == 0) - time_per_byte = 1; - - buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/ - max_scaled_time = buf_lh_capability - urgent_watermark; - return max_scaled_time; -} - -void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) -{ - int i, j = 0; - struct mcif_arb_params *wb_arb_params = NULL; - struct dcn_bw_writeback *bw_writeback = NULL; - enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ - - if (context->stream_count != 0) { - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->num_wb_info != 0) - j++; - } - } - if (j == 0) /*no dwb */ - return; - for (i = 0; i < __DML_NUM_DMB__; i++) { - bw_writeback = &context->bw_ctx.bw.dcn.bw_writeback; - wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[i]; - - for (j = 0 ; j < 4; j++) { - /*current dml only has one set of watermark, need to follow up*/ - bw_writeback->mcif_wb_arb[i].cli_watermark[j] = - dml_get_wm_writeback_urgent(dml_core_ctx) * 1000; - bw_writeback->mcif_wb_arb[i].pstate_watermark[j] = - dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; - } - if (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk != 0) { - /* time_per_pixel should be in u6.6 format */ - bw_writeback->mcif_wb_arb[i].time_per_pixel = - (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; - } - bw_writeback->mcif_wb_arb[i].slice_lines = 32; - bw_writeback->mcif_wb_arb[i].arbitration_slice = 2; - bw_writeback->mcif_wb_arb[i].max_scaled_time = - dml2_calc_max_scaled_time(wb_arb_params->time_per_pixel, - wbif_mode, wb_arb_params->cli_watermark[0]); - /*not required any more*/ - bw_writeback->mcif_wb_arb[i].dram_speed_change_duration = - dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; - - } -} -void dml2_initialize_det_scratch(struct dml2_context *in_ctx) -{ - int i; - - for (i = 0; i < MAX_PLANES; i++) { - in_ctx->det_helper_scratch.dpps_per_surface[i] = 1; - } -} - -static unsigned int find_planes_per_stream_and_stream_count(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg, int *num_of_planes_per_stream) -{ - unsigned int plane_index, stream_index = 0, num_of_streams; - - for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { - /* Number of planes per stream */ - num_of_planes_per_stream[stream_index] += 1; - - if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) - stream_index++; - } - - num_of_streams = stream_index + 1; - - return num_of_streams; -} - -void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg) -{ - unsigned int num_of_streams = 0, plane_index = 0, max_det_size, stream_index = 0; - int num_of_planes_per_stream[__DML_NUM_PLANES__] = { 0 }; - - max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; - - num_of_streams = find_planes_per_stream_and_stream_count(in_ctx, dml_dispcfg, num_of_planes_per_stream); - - for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { - - if (in_ctx->config.override_det_buffer_size_kbytes) - dml_dispcfg->plane.DETSizeOverride[plane_index] = max_det_size / in_ctx->config.dcn_pipe_count; - else { - dml_dispcfg->plane.DETSizeOverride[plane_index] = ((max_det_size / num_of_streams) / num_of_planes_per_stream[stream_index] / in_ctx->det_helper_scratch.dpps_per_surface[plane_index]); - - /* If the override size is not divisible by det_segment_size then round off to nearest number divisible by det_segment_size as - * this is a requirement. - */ - if (dml_dispcfg->plane.DETSizeOverride[plane_index] % in_ctx->config.det_segment_size != 0) { - dml_dispcfg->plane.DETSizeOverride[plane_index] = dml_dispcfg->plane.DETSizeOverride[plane_index] & ~0x3F; - } - - if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) - stream_index++; - } - } -} - -bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch) -{ - unsigned int i = 0, dml_pipe_idx = 0, plane_id = 0; - unsigned int max_det_size, total_det_allocated = 0; - bool need_recalculation = false; - - max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; - - for (i = 0; i < MAX_PIPES; i++) { - if (!display_state->res_ctx.pipe_ctx[i].stream) - continue; - if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state, - display_state->res_ctx.pipe_ctx[i].stream->stream_id, - in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id)) - dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); - else - dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id); - - if (dml_pipe_idx == 0xFFFFFFFF) - continue; - total_det_allocated += dml_get_det_buffer_size_kbytes(&in_ctx->v20.dml_core_ctx, dml_pipe_idx); - if (total_det_allocated > max_det_size) { - need_recalculation = true; - } - } - - /* Store the DPPPerSurface for correctly determining the number of planes in the next call. */ - for (i = 0; i < MAX_PLANES; i++) { - det_scratch->dpps_per_surface[i] = in_ctx->v20.scratch.cur_display_config.hw.DPPPerSurface[i]; - } - - return need_recalculation; -} - -bool dml2_is_stereo_timing(const struct dc_stream_state *stream) -{ - bool is_stereo = false; - - if ((stream->view_format == - VIEW_3D_FORMAT_SIDE_BY_SIDE || - stream->view_format == - VIEW_3D_FORMAT_TOP_AND_BOTTOM) && - (stream->timing.timing_3d_format == - TIMING_3D_FORMAT_TOP_AND_BOTTOM || - stream->timing.timing_3d_format == - TIMING_3D_FORMAT_SIDE_BY_SIDE)) - is_stereo = true; - - return is_stereo; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h deleted file mode 100644 index 04fcfe637119..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _DML2_UTILS_H_ -#define _DML2_UTILS_H_ - -#include "os_types.h" -#include "dml2_dc_types.h" - -struct dc; -struct dml_timing_cfg_st; -struct dml2_dcn_clocks; -struct dc_state; - -void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index); -void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index); -void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index); -void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index); -unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled); -void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context); -void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx); -void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx); -int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); -bool is_dtbclk_required(const struct dc *dc, struct dc_state *context); -bool dml2_is_stereo_timing(const struct dc_stream_state *stream); -unsigned int dml2_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark); - -/* - * dml2_dc_construct_pipes - This function will determine if we need additional pipes based - * on the DML calculated outputs for MPC, ODM and allocate them as necessary. This function - * could be called after in dml_validate_build_resource after dml_mode_pragramming like : - * { - * ... - * map_hw_resources(&s->cur_display_config, &s->mode_support_info); - * result = dml_mode_programming(&in_ctx->dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); - * dml2_dc_construct_pipes(in_display_state, s->mode_support_info, out_hw_context); - * ... - * } - * - * @context: To obtain res_ctx and read other information like stream ID etc. - * @dml_mode_support_st : To get the ODM, MPC outputs as determined by the DML. - * @out_hw_context : Handle to the new hardware context. - * - * - * Return: None. - */ -void dml2_dc_construct_pipes(struct dc_state *context, struct dml_mode_support_info_st *dml_mode_support_st, - struct resource_context *out_hw_context); - -/* - * dml2_predict_pipe_split - This function is the dml2 version of predict split pipe. It predicts a - * if pipe split is required or not and returns the output as a bool. - * @context : dc_state. - * @pipe : old_index is the index of the pipe as derived from pipe_idx. - * @index : index of the pipe - * - * - * Return: Returns the result in boolean. - */ -bool dml2_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); - -/* - * dml2_build_mapped_resource - This function is the dml2 version of build_mapped_resource. - * In case of ODM, we need to build pipe hardware params again as done in dcn20_build_mapped_resource. - * @dc : struct dc - * @context : struct dc_state. - * @stream : stream whoose corresponding pipe params need to be modified. - * - * - * Return: Returns DC_OK if successful. - */ -enum dc_status dml2_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); - -/* - * dml2_extract_rq_regs - This function will extract information needed for struct _vcs_dpi_display_rq_regs_st - * and populate it. - * @context: To obtain and populate the res_ctx->pipe_ctx->rq_regs with DML outputs. - * @support : This structure has the DML intermediate outputs required to populate rq_regs. - * - * - * Return: None. - */ - - /* - * dml2_calculate_rq_and_dlg_params - This function will call into DML2 functions needed - * for populating rq, ttu and dlg param structures and populate it. - * @dc : struct dc - * @context : dc_state provides a handle to selectively populate pipe_ctx - * @out_new_hw_state: To obtain and populate the rq, dlg and ttu regs in - * out_new_hw_state->pipe_ctx with DML outputs. - * @in_ctx : This structure has the pointer to display_mode_lib_st. - * @pipe_cnt : DML functions to obtain RQ, TTu and DLG params need a pipe_index. - * This helps provide pipe_index in the pipe_cnt loop. - * - * - * Return: None. - */ -void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt); - -/* - * dml2_apply_det_buffer_allocation_policy - This function will determine the DET Buffer size - * and return the number of streams. - * @dml2 : Handle for dml2 context - * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config - * Return : None. - */ -void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg); - -/* - * dml2_verify_det_buffer_configuration - This function will verify if the allocated DET buffer exceeds - * the total available DET size available and outputs a boolean to indicate if recalulation is needed. - * @dml2 : Handle for dml2 context - * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config - * @struct dml2_helper_det_policy_scratch : Pointer to DET helper scratch - * Return : returns true if recalculation is required, false otherwise. - */ -bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch); - -/* - * dml2_initialize_det_scratch - This function will initialize the DET scratch space as per requirements. - * @dml2 : Handle for dml2 context - * Return : None - */ -void dml2_initialize_det_scratch(struct dml2_context *in_ctx); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c deleted file mode 100644 index 9deb03a18ccc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ /dev/null @@ -1,704 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "display_mode_core.h" -#include "dml2_internal_types.h" -#include "dml2_utils.h" -#include "dml2_policy.h" -#include "dml2_translation_helper.h" -#include "dml2_mall_phantom.h" -#include "dml2_dc_resource_mgmt.h" -#include "dml21_wrapper.h" - -static void initialize_dml2_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) -{ - if (dml2->config.use_native_soc_bb_construction) - dml2_init_ip_params(dml2, in_dc, out); - else - dml2_translate_ip_params(in_dc, out); -} - -static void initialize_dml2_soc_bbox(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) -{ - if (dml2->config.use_native_soc_bb_construction) - dml2_init_socbb_params(dml2, in_dc, out); - else - dml2_translate_socbb_params(in_dc, out); -} - -static void initialize_dml2_soc_states(struct dml2_context *dml2, - const struct dc *in_dc, const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) -{ - if (dml2->config.use_native_soc_bb_construction) - dml2_init_soc_states(dml2, in_dc, in_bbox, out); - else - dml2_translate_soc_states(in_dc, out, in_dc->dml.soc.num_states); -} - -static void map_hw_resources(struct dml2_context *dml2, - struct dml_display_cfg_st *in_out_display_cfg, struct dml_mode_support_info_st *mode_support_info) -{ - unsigned int num_pipes = 0; - int i, j; - - for (i = 0; i < __DML_NUM_PLANES__; i++) { - in_out_display_cfg->hw.ODMMode[i] = mode_support_info->ODMMode[i]; - in_out_display_cfg->hw.DPPPerSurface[i] = mode_support_info->DPPPerSurface[i]; - in_out_display_cfg->hw.DSCEnabled[i] = mode_support_info->DSCEnabled[i]; - in_out_display_cfg->hw.NumberOfDSCSlices[i] = mode_support_info->NumberOfDSCSlices[i]; - in_out_display_cfg->hw.DLGRefClkFreqMHz = 24; - if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && - dml2->v20.dml_core_ctx.project != dml_project_dcn36 && - dml2->v20.dml_core_ctx.project != dml_project_dcn351) { - /*dGPU default as 50Mhz*/ - in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; - } - for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { - if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { - dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", - __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); - break; - } - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; - dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[num_pipes] = true; - num_pipes++; - } - } -} - -static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2, - const struct dml_display_cfg_st *display_cfg, - struct dml_mode_support_info_st *evaluation_info, - enum dc_validate_mode validate_mode) -{ - struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - - s->mode_support_params.mode_lib = &dml2->v20.dml_core_ctx; - s->mode_support_params.in_display_cfg = display_cfg; - if (validate_mode == DC_VALIDATE_MODE_ONLY) - s->mode_support_params.in_start_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; - else - s->mode_support_params.in_start_state_idx = 0; - s->mode_support_params.out_evaluation_info = evaluation_info; - - memset(evaluation_info, 0, sizeof(struct dml_mode_support_info_st)); - s->mode_support_params.out_lowest_state_idx = 0; - - return dml_mode_support_ex(&s->mode_support_params); -} - -static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrapper_optimize_configuration_params *p) -{ - int unused_dpps = p->ip_params->max_num_dpp; - int i; - int odms_needed; - int largest_blend_and_timing = 0; - bool optimization_done = false; - - for (i = 0; i < (int) p->cur_display_config->num_timings; i++) { - if (p->cur_display_config->plane.BlendingAndTiming[i] > largest_blend_and_timing) - largest_blend_and_timing = p->cur_display_config->plane.BlendingAndTiming[i]; - } - - if (p->new_policy != p->cur_policy) - *p->new_policy = *p->cur_policy; - - if (p->new_display_config != p->cur_display_config) - *p->new_display_config = *p->cur_display_config; - - - // Optimize Clocks - if (!optimization_done) { - if (largest_blend_and_timing == 0 && p->cur_policy->ODMUse[0] == dml_odm_use_policy_combine_as_needed && dml2->config.minimize_dispclk_using_odm) { - odms_needed = dml2_util_get_maximum_odm_combine_for_output(dml2->config.optimize_odm_4to1, - p->cur_display_config->output.OutputEncoder[0], p->cur_mode_support_info->DSCEnabled[0]) - 1; - - if (odms_needed <= unused_dpps) { - if (odms_needed == 1) { - p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_2to1; - optimization_done = true; - } else if (odms_needed == 3) { - p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_4to1; - optimization_done = true; - } else - optimization_done = false; - } - } - } - - return optimization_done; -} - -static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state, - enum dc_validate_mode validate_mode) -{ - struct dml2_calculate_lowest_supported_state_for_temp_read_scratch *s = &dml2->v20.scratch.dml2_calculate_lowest_supported_state_for_temp_read_scratch; - struct dml2_wrapper_scratch *s_global = &dml2->v20.scratch; - - unsigned int dml_result = 0; - int result = -1, i, j; - - build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); - - /* Zero out before each call before proceeding */ - memset(s, 0, sizeof(struct dml2_calculate_lowest_supported_state_for_temp_read_scratch)); - memset(&s_global->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); - memset(&s_global->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); - - for (i = 0; i < dml2->config.dcn_pipe_count; i++) { - /* Calling resource_build_scaling_params will populate the pipe params - * with the necessary information needed for correct DML calculations - * This is also done in DML1 driver code path and hence display_state - * cannot be const. - */ - struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!dml2->config.callbacks.build_scaling_params(pipe)) { - ASSERT(false); - return false; - } - } - } - - map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); - - for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { - s->uclk_change_latencies[i] = dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us; - } - - for (i = 0; i < 4; i++) { - for (j = 0; j < dml2->v20.dml_core_ctx.states.num_states; j++) { - dml2->v20.dml_core_ctx.states.state_array[j].dram_clock_change_latency_us = s_global->dummy_pstate_table[i].dummy_pstate_latency_us; - } - - dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info, - validate_mode); - - if (dml_result && s->evaluation_info.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive) { - map_hw_resources(dml2, &s->cur_display_config, &s->evaluation_info); - dml_result = dml_mode_programming(&dml2->v20.dml_core_ctx, s_global->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); - - ASSERT(dml_result); - - dml2_extract_watermark_set(&dml2->v20.g6_temp_read_watermark_set, &dml2->v20.dml_core_ctx); - dml2->v20.g6_temp_read_watermark_set.cstate_pstate.fclk_pstate_change_ns = dml2->v20.g6_temp_read_watermark_set.cstate_pstate.pstate_change_ns; - - result = s_global->mode_support_params.out_lowest_state_idx; - - while (dml2->v20.dml_core_ctx.states.state_array[result].dram_speed_mts < s_global->dummy_pstate_table[i].dram_speed_mts) - result++; - - break; - } - } - - for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { - dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us = s->uclk_change_latencies[i]; - } - - return result; -} - -static void copy_dummy_pstate_table(struct dummy_pstate_entry *dest, struct dummy_pstate_entry *src, unsigned int num_entries) -{ - for (int i = 0; i < num_entries; i++) { - dest[i] = src[i]; - } -} - -static bool are_timings_requiring_odm_doing_blending(const struct dml_display_cfg_st *display_cfg, - const struct dml_mode_support_info_st *evaluation_info) -{ - unsigned int planes_per_timing[__DML_NUM_PLANES__] = {0}; - int i; - - for (i = 0; i < display_cfg->num_surfaces; i++) - planes_per_timing[display_cfg->plane.BlendingAndTiming[i]]++; - - for (i = 0; i < __DML_NUM_PLANES__; i++) { - if (planes_per_timing[i] > 1 && evaluation_info->ODMMode[i] != dml_odm_mode_bypass) - return true; - } - - return false; -} - -static bool does_configuration_meet_sw_policies(struct dml2_context *ctx, const struct dml_display_cfg_st *display_cfg, - const struct dml_mode_support_info_st *evaluation_info) -{ - bool pass = true; - - if (!ctx->config.enable_windowed_mpo_odm) { - if (are_timings_requiring_odm_doing_blending(display_cfg, evaluation_info)) - pass = false; - } - - return pass; -} - -static bool dml_mode_support_wrapper(struct dml2_context *dml2, - struct dc_state *display_state, - enum dc_validate_mode validate_mode) -{ - struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - unsigned int result = 0, i; - unsigned int optimized_result = true; - - build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); - - /* Zero out before each call before proceeding */ - memset(&s->cur_display_config, 0, sizeof(struct dml_display_cfg_st)); - memset(&s->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); - memset(&s->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); - memset(&s->optimize_configuration_params, 0, sizeof(struct dml2_wrapper_optimize_configuration_params)); - - for (i = 0; i < dml2->config.dcn_pipe_count; i++) { - /* Calling resource_build_scaling_params will populate the pipe params - * with the necessary information needed for correct DML calculations - * This is also done in DML1 driver code path and hence display_state - * cannot be const. - */ - struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!dml2->config.callbacks.build_scaling_params(pipe)) { - ASSERT(false); - return false; - } - } - } - - map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); - if (!dml2->config.skip_hw_state_mapping) - dml2_apply_det_buffer_allocation_policy(dml2, &s->cur_display_config); - - result = pack_and_call_dml_mode_support_ex(dml2, - &s->cur_display_config, - &s->mode_support_info, - validate_mode); - - if (result) - result = does_configuration_meet_sw_policies(dml2, &s->cur_display_config, &s->mode_support_info); - - // Try to optimize - if (result) { - s->cur_policy = dml2->v20.dml_core_ctx.policy; - s->optimize_configuration_params.dml_core_ctx = &dml2->v20.dml_core_ctx; - s->optimize_configuration_params.config = &dml2->config; - s->optimize_configuration_params.ip_params = &dml2->v20.dml_core_ctx.ip; - s->optimize_configuration_params.cur_display_config = &s->cur_display_config; - s->optimize_configuration_params.cur_mode_support_info = &s->mode_support_info; - s->optimize_configuration_params.cur_policy = &s->cur_policy; - s->optimize_configuration_params.new_display_config = &s->new_display_config; - s->optimize_configuration_params.new_policy = &s->new_policy; - - while (optimized_result && optimize_configuration(dml2, &s->optimize_configuration_params)) { - dml2->v20.dml_core_ctx.policy = s->new_policy; - optimized_result = pack_and_call_dml_mode_support_ex(dml2, - &s->new_display_config, - &s->mode_support_info, - validate_mode); - - if (optimized_result) - optimized_result = does_configuration_meet_sw_policies(dml2, &s->new_display_config, &s->mode_support_info); - - // If the new optimized state is supposed, then set current = new - if (optimized_result) { - s->cur_display_config = s->new_display_config; - s->cur_policy = s->new_policy; - } else { - // Else, restore policy to current - dml2->v20.dml_core_ctx.policy = s->cur_policy; - } - } - - // Optimize ended with a failed config, so we need to restore DML state to last passing - if (!optimized_result) { - result = pack_and_call_dml_mode_support_ex(dml2, - &s->cur_display_config, - &s->mode_support_info, - validate_mode); - } - } - - if (result) - map_hw_resources(dml2, &s->cur_display_config, &s->mode_support_info); - - return result; -} - -static bool call_dml_mode_support_and_programming(struct dc_state *context, enum dc_validate_mode validate_mode) -{ - unsigned int result = 0; - unsigned int min_state = 0; - int min_state_for_g6_temp_read = 0; - - - if (!context) - return false; - - struct dml2_context *dml2 = context->bw_ctx.dml2; - struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - - if (!context->streams[0]->sink->link->dc->caps.is_apu) { - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context, - validate_mode); - - ASSERT(min_state_for_g6_temp_read >= 0); - } - - result = dml_mode_support_wrapper(dml2, context, validate_mode); - - /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. - * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. - */ - if (!context->streams[0]->sink->link->dc->caps.is_apu) { - if (min_state_for_g6_temp_read >= 0) - min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; - else - min_state = s->mode_support_params.out_lowest_state_idx; - } - - if (result) { - if (!context->streams[0]->sink->link->dc->caps.is_apu) { - result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); - } else { - result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); - } - } - return result; -} - -static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context, - enum dc_validate_mode validate_mode) -{ - struct dml2_context *dml2 = context->bw_ctx.dml2; - struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - struct dml2_dcn_clocks out_clks; - unsigned int result = 0; - bool need_recalculation = false; - uint32_t cstate_enter_plus_exit_z8_ns; - - if (context->stream_count == 0) { - unsigned int lowest_state_idx = 0; - - out_clks.p_state_supported = true; - out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ - out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; - out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; - out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; - out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; - out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; - out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; - context->bw_ctx.bw.dcn.clk.dtbclk_en = false; - dml2_copy_clocks_to_dc_state(&out_clks, context); - return true; - } - - /* Zero out before each call before proceeding */ - memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); - memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); - memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); - memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); - - /* Initialize DET scratch */ - dml2_initialize_det_scratch(dml2); - - copy_dummy_pstate_table(s->dummy_pstate_table, in_dc->clk_mgr->bw_params->dummy_pstate_table, 4); - - result = call_dml_mode_support_and_programming(context, validate_mode); - /* Call map dc pipes to map the pipes based on the DML output. For correctly determining if recalculation - * is required or not, the resource context needs to correctly reflect the number of active pipes. We would - * only know the correct number if active pipes after dml2_map_dc_pipes is called. - */ - if (result && !dml2->config.skip_hw_state_mapping) - dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); - - /* Verify and update DET Buffer configuration if needed. dml2_verify_det_buffer_configuration will check if DET Buffer - * size needs to be updated. If yes it will update the DETOverride variable and set need_recalculation flag to true. - * Based on that flag, run mode support again. Verification needs to be run after dml_mode_programming because the getters - * return correct det buffer values only after dml_mode_programming is called. - */ - if (result && !dml2->config.skip_hw_state_mapping) { - need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); - if (need_recalculation) { - /* Engage the DML again if recalculation is required. */ - call_dml_mode_support_and_programming(context, validate_mode); - if (!dml2->config.skip_hw_state_mapping) { - dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); - } - need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); - ASSERT(need_recalculation == false); - } - } - - if (result) { - unsigned int lowest_state_idx = s->mode_support_params.out_lowest_state_idx; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.mp.Dispclk_calculated * 1000; - out_clks.p_state_supported = s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported; - if (in_dc->config.use_default_clock_table && - (lowest_state_idx < dml2->v20.dml_core_ctx.states.num_states - 1)) { - lowest_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; - } - - out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; - out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; - out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; - out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; - out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; - out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(in_dc, context); - - if (!dml2->config.skip_hw_state_mapping) { - /* Call dml2_calculate_rq_and_dlg_params */ - dml2_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml2, in_dc->res_pool->pipe_count); - } - - dml2_copy_clocks_to_dc_state(&out_clks, context); - dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx); - dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); - if (context->streams[0]->sink->link->dc->caps.is_apu) - dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx); - else - memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); - dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); - dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx); - //copy for deciding zstate use - context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod; - - cstate_enter_plus_exit_z8_ns = context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns; - - if (context->bw_ctx.dml.vba.StutterPeriod < in_dc->debug.minimum_z8_residency_time && - cstate_enter_plus_exit_z8_ns < in_dc->debug.minimum_z8_residency_time * 1000) - cstate_enter_plus_exit_z8_ns = in_dc->debug.minimum_z8_residency_time * 1000; - - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns; - } - - return result; -} - -static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode validate_mode) -{ - struct dml2_context *dml2; - unsigned int result = 0; - - if (!context || context->stream_count == 0) - return true; - - dml2 = context->bw_ctx.dml2; - - /* Zero out before each call before proceeding */ - memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); - memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); - memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); - memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); - - build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); - - map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); - if (!dml2->config.skip_hw_state_mapping) - dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); - - result = pack_and_call_dml_mode_support_ex(dml2, - &dml2->v20.scratch.cur_display_config, - &dml2->v20.scratch.mode_support_info, - validate_mode); - - if (result) - result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info); - - return result == 1; -} - -static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) -{ - if (dc->debug.override_odm_optimization) { - dml2->config.minimize_dispclk_using_odm = dc->debug.minimize_dispclk_using_odm; - } -} - -bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, - enum dc_validate_mode validate_mode) -{ - bool out = false; - - if (!dml2) - return false; - dml2_apply_debug_options(in_dc, dml2); - - /* DML2.1 validation path */ - if (dml2->architecture == dml2_architecture_21) { - out = dml21_validate(in_dc, context, dml2, validate_mode); - return out; - } - - DC_FP_START(); - - /* Use dml_validate_only for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ - if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) - out = dml2_validate_only(context, validate_mode); - else - out = dml2_validate_and_build_resource(in_dc, context, validate_mode); - - DC_FP_END(); - - return out; -} - -static inline struct dml2_context *dml2_allocate_memory(void) -{ - return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); -} - -static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) -{ - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { - dml21_reinit(in_dc, *dml2, config); - return; - } - - // Store config options - (*dml2)->config = *config; - - switch (in_dc->ctx->dce_version) { - case DCN_VERSION_3_5: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn35; - break; - case DCN_VERSION_3_51: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn351; - break; - case DCN_VERSION_3_6: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn36; - break; - case DCN_VERSION_3_2: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn32; - break; - case DCN_VERSION_3_21: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn321; - break; - case DCN_VERSION_4_01: - (*dml2)->v20.dml_core_ctx.project = dml_project_dcn401; - break; - default: - (*dml2)->v20.dml_core_ctx.project = dml_project_default; - break; - } - - DC_FP_START(); - - initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); - - initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); - - initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); - - DC_FP_END(); -} - -bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) -{ - // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) - return dml21_create(in_dc, dml2, config); - - // Allocate Mode Lib Ctx - *dml2 = dml2_allocate_memory(); - - if (!(*dml2)) - return false; - - dml2_init(in_dc, config, dml2); - - return true; -} - -void dml2_destroy(struct dml2_context *dml2) -{ - if (!dml2) - return; - - if (dml2->architecture == dml2_architecture_21) - dml21_destroy(dml2); - vfree(dml2); -} - -void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, - unsigned int *fclk_change_support, unsigned int *dram_clk_change_support) -{ - *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0]; - *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; -} - -void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2) -{ - if (dml2->architecture == dml2_architecture_21) - dml21_prepare_mcache_programming(in_dc, context, dml2); -} - -void dml2_copy(struct dml2_context *dst_dml2, - struct dml2_context *src_dml2) -{ - if (src_dml2->architecture == dml2_architecture_21) { - dml21_copy(dst_dml2, src_dml2); - return; - } - /* copy Mode Lib Ctx */ - memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context)); -} - -bool dml2_create_copy(struct dml2_context **dst_dml2, - struct dml2_context *src_dml2) -{ - if (src_dml2->architecture == dml2_architecture_21) - return dml21_create_copy(dst_dml2, src_dml2); - /* Allocate Mode Lib Ctx */ - *dst_dml2 = dml2_allocate_memory(); - - if (!(*dst_dml2)) - return false; - - /* copy Mode Lib Ctx */ - dml2_copy(*dst_dml2, src_dml2); - - return true; -} - -void dml2_reinit(const struct dc *in_dc, - const struct dml2_configuration_options *config, - struct dml2_context **dml2) -{ - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { - dml21_reinit(in_dc, *dml2, config); - return; - } - - dml2_init(in_dc, config, dml2); -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h deleted file mode 100644 index c384e141cebc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ /dev/null @@ -1,309 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DML2_WRAPPER_H_ -#define _DML2_WRAPPER_H_ - -#include "os_types.h" - -#define DML2_MAX_NUM_DPM_LVL 30 - -struct dml2_context; -struct display_mode_lib_st; -struct dc; -struct pipe_ctx; -struct dc_plane_state; -struct dc_sink; -struct dc_stream_state; -struct resource_context; -struct display_stream_compressor; -struct dc_mcache_params; - -// Configuration of the MALL on the SoC -struct dml2_soc_mall_info { - // Cache line size of 0 means MALL is not enabled/present - unsigned int cache_line_size_bytes; - unsigned int cache_num_ways; - unsigned int max_cab_allocation_bytes; - - unsigned int mblk_width_pixels; - unsigned int mblk_size_bytes; - unsigned int mblk_height_4bpe_pixels; - unsigned int mblk_height_8bpe_pixels; -}; - -// Output of DML2 for clock requirements -struct dml2_dcn_clocks { - unsigned int dispclk_khz; - unsigned int dcfclk_khz; - unsigned int fclk_khz; - unsigned int uclk_mts; - unsigned int phyclk_khz; - unsigned int socclk_khz; - unsigned int ref_dtbclk_khz; - bool p_state_supported; - unsigned int cab_num_ways_required; - unsigned int dcfclk_khz_ds; -}; - -struct dml2_dc_callbacks { - struct dc *dc; - bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); - void (*build_test_pattern_params)(struct resource_context *res_ctx, struct pipe_ctx *otg_master); - bool (*can_support_mclk_switch_using_fw_based_vblank_stretch)(struct dc *dc, struct dc_state *context); - bool (*acquire_secondary_pipe_for_mpc_odm)(const struct dc *dc, struct dc_state *state, struct pipe_ctx *pri_pipe, struct pipe_ctx *sec_pipe, bool odm); - bool (*update_pipes_for_stream_with_slice_count)( - struct dc_state *new_ctx, - const struct dc_state *cur_ctx, - const struct resource_pool *pool, - const struct dc_stream_state *stream, - int new_slice_count); - bool (*update_pipes_for_plane_with_slice_count)( - struct dc_state *new_ctx, - const struct dc_state *cur_ctx, - const struct resource_pool *pool, - const struct dc_plane_state *plane, - int slice_count); - int (*get_odm_slice_index)(const struct pipe_ctx *opp_head); - int (*get_odm_slice_count)(const struct pipe_ctx *opp_head); - int (*get_mpc_slice_index)(const struct pipe_ctx *dpp_pipe); - int (*get_mpc_slice_count)(const struct pipe_ctx *dpp_pipe); - struct pipe_ctx *(*get_opp_head)(const struct pipe_ctx *pipe_ctx); - struct pipe_ctx *(*get_otg_master_for_stream)( - struct resource_context *res_ctx, - const struct dc_stream_state *stream); - int (*get_opp_heads_for_otg_master)(const struct pipe_ctx *otg_master, - struct resource_context *res_ctx, - struct pipe_ctx *opp_heads[MAX_PIPES]); - int (*get_dpp_pipes_for_plane)(const struct dc_plane_state *plane, - struct resource_context *res_ctx, - struct pipe_ctx *dpp_pipes[MAX_PIPES]); - struct dc_stream_status *(*get_stream_status)( - struct dc_state *state, - const struct dc_stream_state *stream); - struct dc_stream_state *(*get_stream_from_id)(const struct dc_state *state, unsigned int id); - unsigned int (*get_max_flickerless_instant_vtotal_increase)( - struct dc_stream_state *stream, - bool is_gaming); - bool (*allocate_mcache)(struct dc_state *context, const struct dc_mcache_params *mcache_params); -}; - -struct dml2_dc_svp_callbacks { - struct dc *dc; - bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); - struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc, - struct dc_state *state, - struct dc_stream_state *main_stream); - struct dc_plane_state* (*create_phantom_plane)(const struct dc *dc, - struct dc_state *state, - struct dc_plane_state *main_plane); - enum dc_status (*add_phantom_stream)(const struct dc *dc, - struct dc_state *state, - struct dc_stream_state *phantom_stream, - struct dc_stream_state *main_stream); - bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - bool (*remove_phantom_plane)(const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context); - enum dc_status (*remove_phantom_stream)(const struct dc *dc, - struct dc_state *state, - struct dc_stream_state *stream); - void (*release_phantom_plane)(const struct dc *dc, - struct dc_state *state, - struct dc_plane_state *plane); - void (*release_phantom_stream)(const struct dc *dc, - struct dc_state *state, - struct dc_stream_state *stream); - void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); - enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx); - enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream); - struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream); - bool (*remove_phantom_streams_and_planes)( - const struct dc *dc, - struct dc_state *state); - void (*release_phantom_streams_and_planes)( - const struct dc *dc, - struct dc_state *state); - unsigned int (*calculate_mall_ways_from_bytes)( - const struct dc *dc, - unsigned int total_size_in_mall_bytes); -}; - -struct dml2_clks_table_entry { - unsigned int dcfclk_mhz; - unsigned int fclk_mhz; - unsigned int memclk_mhz; - unsigned int socclk_mhz; - unsigned int dtbclk_mhz; - unsigned int dispclk_mhz; - unsigned int dppclk_mhz; - unsigned int dram_speed_mts; /*which is based on wck_ratio*/ -}; - -struct dml2_clks_num_entries { - unsigned int num_dcfclk_levels; - unsigned int num_fclk_levels; - unsigned int num_memclk_levels; - unsigned int num_socclk_levels; - unsigned int num_dtbclk_levels; - unsigned int num_dispclk_levels; - unsigned int num_dppclk_levels; -}; - -struct dml2_clks_limit_table { - struct dml2_clks_table_entry clk_entries[DML2_MAX_NUM_DPM_LVL]; - struct dml2_clks_num_entries num_entries_per_clk; - unsigned int num_states; -}; - -// Various overrides, per ASIC or per SKU specific, or for debugging purpose when/if available -struct dml2_soc_bbox_overrides { - double xtalclk_mhz; - double dchub_refclk_mhz; - double dprefclk_mhz; - double disp_pll_vco_speed_mhz; - double urgent_latency_us; - double sr_exit_latency_us; - double sr_enter_plus_exit_latency_us; - double sr_exit_z8_time_us; - double sr_enter_plus_exit_z8_time_us; - double dram_clock_change_latency_us; - double fclk_change_latency_us; - unsigned int dram_num_chan; - unsigned int dram_chanel_width_bytes; - struct dml2_clks_limit_table clks_table; -}; - -enum dml2_force_pstate_methods { - dml2_force_pstate_method_auto = 0, - dml2_force_pstate_method_vactive, - dml2_force_pstate_method_vblank, - dml2_force_pstate_method_drr, - dml2_force_pstate_method_subvp, -}; - -struct dml2_configuration_options { - int dcn_pipe_count; - bool use_native_pstate_optimization; - bool enable_windowed_mpo_odm; - bool use_native_soc_bb_construction; - bool skip_hw_state_mapping; - bool optimize_odm_4to1; - bool minimize_dispclk_using_odm; - bool override_det_buffer_size_kbytes; - struct dml2_dc_callbacks callbacks; - struct { - bool force_disable_subvp; - bool force_enable_subvp; - unsigned int subvp_fw_processing_delay_us; - unsigned int subvp_pstate_allow_width_us; - unsigned int subvp_prefetch_end_to_mall_start_us; - unsigned int subvp_swath_height_margin_lines; - struct dml2_dc_svp_callbacks callbacks; - } svp_pstate; - struct dml2_soc_mall_info mall_cfg; - struct dml2_soc_bbox_overrides bbox_overrides; - unsigned int max_segments_per_hubp; - unsigned int det_segment_size; - /* Only for debugging purposes when initializing SOCBB params via tool for DML21. */ - struct socbb_ip_params_external *external_socbb_ip_params; - struct { - bool force_pstate_method_enable; - enum dml2_force_pstate_methods force_pstate_method_values[MAX_PIPES]; - } pmo; - bool map_dc_pipes_with_callbacks; - - bool use_clock_dc_limits; - bool gpuvm_enable; - bool force_tdlut_enable; - void *bb_from_dmub; -}; - -/* - * dml2_create - Creates dml2_context. - * @in_dc: dc. - * @config: dml2 configuration options. - * @dml2: Created dml2 context. - * - * Create and destroy of DML2 is done as part of dc_state creation - * and dc_state_free. DML2 IP, SOC and STATES are initialized at - * creation time. - * - * Return: True if dml2 is successfully created, false otherwise. - */ -bool dml2_create(const struct dc *in_dc, - const struct dml2_configuration_options *config, - struct dml2_context **dml2); - -void dml2_destroy(struct dml2_context *dml2); -void dml2_copy(struct dml2_context *dst_dml2, - struct dml2_context *src_dml2); -bool dml2_create_copy(struct dml2_context **dst_dml2, - struct dml2_context *src_dml2); -void dml2_reinit(const struct dc *in_dc, - const struct dml2_configuration_options *config, - struct dml2_context **dml2); - -/* - * dml2_validate - Determines if a display configuration is supported or not. - * @in_dc: dc. - * @context: dc_state to be validated. - * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX will not populate context.res_ctx. - * - * DML1.0 compatible interface for validation. - * - * Based on fast_validate option internally would call: - * - * -dml2_validate_and_build_resource - for non fast_validate option - * Calculates if dc_state can be supported on the SOC, and attempts to - * optimize the power management feature supports versus minimum clocks. - * If supported, also builds out_new_hw_state to represent the hw programming - * for the new dc state. - * - * -dml2_validate_only - for fast_validate option - * Calculates if dc_state can be supported on the SOC (i.e. at maximum - * clocks) with all mandatory power features enabled. - - * Context: Two threads may not invoke this function concurrently unless they reference - * separate dc_states for validation. - * Return: True if mode is supported, false otherwise. - */ -bool dml2_validate(const struct dc *in_dc, - struct dc_state *context, - struct dml2_context *dml2, - enum dc_validate_mode validate_mode); - -/* - * dml2_extract_dram_and_fclk_change_support - Extracts the FCLK and UCLK change support info. - * @dml2: input dml2 context pointer. - * @fclk_change_support: output pointer holding the fclk change support info (vactive, vblank, unsupported). - * @dram_clk_change_support: output pointer holding the uclk change support info (vactive, vblank, unsupported). - */ -void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, - unsigned int *fclk_change_support, unsigned int *dram_clk_change_support); -void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2); -#endif //_DML2_WRAPPER_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h deleted file mode 100644 index 17f0972b1af7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DML_ASSERT_H__ -#define __DML_ASSERT_H__ - -#include "os_types.h" - -#endif //__DML_ASSERT_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h deleted file mode 100644 index f7d30b47beff..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -/* This header intentinally does not include an #ifdef guard as it only contains includes for other headers*/ - -/* - * Standard Types - */ -#include "os_types.h" -#include "cmntypes.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c deleted file mode 100644 index 00d22e542469..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c +++ /dev/null @@ -1,573 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "dml_display_rq_dlg_calc.h" -#include "display_mode_core.h" -#include "display_mode_util.h" - -static dml_bool_t is_dual_plane(enum dml_source_format_class source_format) -{ - dml_bool_t ret_val = 0; - - if ((source_format == dml_420_12) || (source_format == dml_420_8) || (source_format == dml_420_10) || (source_format == dml_rgbe_alpha)) - ret_val = 1; - - return ret_val; -} - -void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, - struct display_mode_lib_st *mode_lib, - const dml_uint_t pipe_idx) -{ - dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; - enum dml_swizzle_mode sw_mode = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[plane_idx]; - dml_bool_t dual_plane = is_dual_plane((enum dml_source_format_class)(source_format)); - - uint32 pixel_chunk_bytes = 0; - uint32 min_pixel_chunk_bytes = 0; - uint32 meta_chunk_bytes = 0; - uint32 min_meta_chunk_bytes = 0; - uint32 dpte_group_bytes = 0; - uint32 mpte_group_bytes = 0; - - uint32 p1_pixel_chunk_bytes = 0; - uint32 p1_min_pixel_chunk_bytes = 0; - uint32 p1_meta_chunk_bytes = 0; - uint32 p1_min_meta_chunk_bytes = 0; - uint32 p1_dpte_group_bytes = 0; - uint32 p1_mpte_group_bytes = 0; - - dml_uint_t detile_buf_size_in_bytes; - dml_uint_t detile_buf_plane1_addr = 0; - - dml_float_t stored_swath_l_bytes; - dml_float_t stored_swath_c_bytes; - dml_bool_t is_phantom_pipe; - - dml_uint_t pte_row_height_linear; - - dml_print("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); - - memset(rq_regs, 0, sizeof(*rq_regs)); - - pixel_chunk_bytes = (dml_uint_t)(dml_get_pixel_chunk_size_in_kbyte(mode_lib) * 1024); - min_pixel_chunk_bytes = (dml_uint_t)(dml_get_min_pixel_chunk_size_in_byte(mode_lib)); - - if (pixel_chunk_bytes == 64 * 1024) - min_pixel_chunk_bytes = 0; - - meta_chunk_bytes = (dml_uint_t)(dml_get_meta_chunk_size_in_kbyte(mode_lib) * 1024); - min_meta_chunk_bytes = (dml_uint_t)(dml_get_min_meta_chunk_size_in_byte(mode_lib)); - - dpte_group_bytes = (dml_uint_t)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); - mpte_group_bytes = (dml_uint_t)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); - - p1_pixel_chunk_bytes = pixel_chunk_bytes; - p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; - p1_meta_chunk_bytes = meta_chunk_bytes; - p1_min_meta_chunk_bytes = min_meta_chunk_bytes; - p1_dpte_group_bytes = dpte_group_bytes; - p1_mpte_group_bytes = mpte_group_bytes; - - if (source_format == dml_rgbe_alpha) - p1_pixel_chunk_bytes = (dml_uint_t)(dml_get_alpha_pixel_chunk_size_in_kbyte(mode_lib) * 1024); - - rq_regs->rq_regs_l.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) pixel_chunk_bytes) - 10); - rq_regs->rq_regs_c.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_pixel_chunk_bytes) - 10); - - if (min_pixel_chunk_bytes == 0) - rq_regs->rq_regs_l.min_chunk_size = 0; - else - rq_regs->rq_regs_l.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_pixel_chunk_bytes) - 8 + 1); - - if (p1_min_pixel_chunk_bytes == 0) - rq_regs->rq_regs_c.min_chunk_size = 0; - else - rq_regs->rq_regs_c.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_pixel_chunk_bytes) - 8 + 1); - - rq_regs->rq_regs_l.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) meta_chunk_bytes) - 10); - rq_regs->rq_regs_c.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_meta_chunk_bytes) - 10); - - if (min_meta_chunk_bytes == 0) - rq_regs->rq_regs_l.min_meta_chunk_size = 0; - else - rq_regs->rq_regs_l.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_meta_chunk_bytes) - 6 + 1); - - if (min_meta_chunk_bytes == 0) - rq_regs->rq_regs_c.min_meta_chunk_size = 0; - else - rq_regs->rq_regs_c.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_meta_chunk_bytes) - 6 + 1); - - rq_regs->rq_regs_l.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) dpte_group_bytes) - 6); - rq_regs->rq_regs_l.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) mpte_group_bytes) - 6); - rq_regs->rq_regs_c.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_dpte_group_bytes) - 6); - rq_regs->rq_regs_c.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_mpte_group_bytes) - 6); - - detile_buf_size_in_bytes = (dml_uint_t)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); - - pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); - - if (sw_mode == dml_sw_linear) - ASSERT(pte_row_height_linear >= 8); - - rq_regs->rq_regs_l.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) pte_row_height_linear), 1) - 3); - - if (dual_plane) { - dml_uint_t p1_pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); - if (sw_mode == dml_sw_linear) - ASSERT(p1_pte_row_height_linear >= 8); - - rq_regs->rq_regs_c.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) p1_pte_row_height_linear), 1) - 3); - } - - rq_regs->rq_regs_l.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_l(mode_lib, pipe_idx))); - rq_regs->rq_regs_c.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_c(mode_lib, pipe_idx))); - - if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb - rq_regs->drq_expansion_mode = 0; - } else { - rq_regs->drq_expansion_mode = 2; - } - rq_regs->prq_expansion_mode = 1; - rq_regs->mrq_expansion_mode = 1; - rq_regs->crq_expansion_mode = 1; - - stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); - stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); - is_phantom_pipe = dml_get_is_phantom_pipe(mode_lib, pipe_idx); - - // Note: detile_buf_plane1_addr is in unit of 1KB - if (dual_plane) { - if (is_phantom_pipe) { - detile_buf_plane1_addr = (dml_uint_t)((1024.0*1024.0) / 2.0 / 1024.0); // half to chroma - } else { - if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { - detile_buf_plane1_addr = (dml_uint_t)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma -#ifdef __DML_VBA_DEBUG__ - dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); -#endif - } else { - detile_buf_plane1_addr = (dml_uint_t)(dml_round_to_multiple((dml_uint_t)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma -#ifdef __DML_VBA_DEBUG__ - dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); -#endif - } - } - } - rq_regs->plane1_base_address = detile_buf_plane1_addr; - -#ifdef __DML_VBA_DEBUG__ - dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); - dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); - dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); - dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); - dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); - dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); -#endif - dml_print_rq_regs_st(rq_regs); - dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); -} - -// Note: currently taken in as is. -// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. - - -void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, - dml_display_ttu_regs_st *disp_ttu_regs, - struct display_mode_lib_st *mode_lib, - const dml_uint_t pipe_idx) -{ - dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; - struct dml_timing_cfg_st *timing = &mode_lib->ms.cache_display_cfg.timing; - struct dml_plane_cfg_st *plane = &mode_lib->ms.cache_display_cfg.plane; - struct dml_hw_resource_st *hw = &mode_lib->ms.cache_display_cfg.hw; - dml_bool_t dual_plane = is_dual_plane(source_format); - dml_uint_t num_cursors = plane->NumberOfCursors[plane_idx]; - enum dml_odm_mode odm_mode = hw->ODMMode[plane_idx]; - - dml_uint_t htotal = timing->HTotal[plane_idx]; - dml_uint_t hactive = timing->HActive[plane_idx]; - dml_uint_t hblank_end = timing->HBlankEnd[plane_idx]; - dml_uint_t vblank_end = timing->VBlankEnd[plane_idx]; - dml_bool_t interlaced = timing->Interlace[plane_idx]; - dml_float_t pclk_freq_in_mhz = (dml_float_t) timing->PixelClock[plane_idx]; - dml_float_t refclk_freq_in_mhz = (hw->DLGRefClkFreqMHz > 0) ? (dml_float_t) hw->DLGRefClkFreqMHz : mode_lib->soc.refclk_mhz; - dml_float_t ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; - - dml_uint_t vready_after_vcount0; - - dml_uint_t dst_x_after_scaler; - dml_uint_t dst_y_after_scaler; - - dml_float_t dst_y_prefetch; - dml_float_t dst_y_per_vm_vblank; - dml_float_t dst_y_per_row_vblank; - dml_float_t dst_y_per_vm_flip; - dml_float_t dst_y_per_row_flip; - - dml_float_t max_dst_y_per_vm_vblank = 32.0; //U5.2 - dml_float_t max_dst_y_per_row_vblank = 16.0; //U4.2 - - dml_float_t vratio_pre_l; - dml_float_t vratio_pre_c; - - dml_float_t refcyc_per_line_delivery_pre_l; - dml_float_t refcyc_per_line_delivery_l; - dml_float_t refcyc_per_line_delivery_pre_c = 0.; - dml_float_t refcyc_per_line_delivery_c = 0.; - dml_float_t refcyc_per_req_delivery_pre_l; - dml_float_t refcyc_per_req_delivery_l; - dml_float_t refcyc_per_req_delivery_pre_c = 0.; - dml_float_t refcyc_per_req_delivery_c = 0.; - dml_float_t refcyc_per_req_delivery_pre_cur0 = 0.; - dml_float_t refcyc_per_req_delivery_cur0 = 0.; - - dml_float_t dst_y_per_pte_row_nom_l; - dml_float_t dst_y_per_pte_row_nom_c; - dml_float_t dst_y_per_meta_row_nom_l; - dml_float_t dst_y_per_meta_row_nom_c; - dml_float_t refcyc_per_pte_group_nom_l; - dml_float_t refcyc_per_pte_group_nom_c; - dml_float_t refcyc_per_pte_group_vblank_l; - dml_float_t refcyc_per_pte_group_vblank_c; - dml_float_t refcyc_per_pte_group_flip_l; - dml_float_t refcyc_per_pte_group_flip_c; - dml_float_t refcyc_per_meta_chunk_nom_l; - dml_float_t refcyc_per_meta_chunk_nom_c; - dml_float_t refcyc_per_meta_chunk_vblank_l; - dml_float_t refcyc_per_meta_chunk_vblank_c; - dml_float_t refcyc_per_meta_chunk_flip_l; - dml_float_t refcyc_per_meta_chunk_flip_c; - - dml_float_t temp; - dml_float_t min_ttu_vblank; - dml_uint_t min_dst_y_next_start; - - dml_print("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); - dml_print("DML_DLG::%s: plane_idx = %d\n", __func__, plane_idx); - dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); - dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); - dml_print("DML_DLG: %s: hw->DLGRefClkFreqMHz = %3.2f\n", __func__, hw->DLGRefClkFreqMHz); - dml_print("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.refclk_mhz); - dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); - dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); - dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); - - memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); - memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); - - ASSERT(refclk_freq_in_mhz != 0); - ASSERT(pclk_freq_in_mhz != 0); - ASSERT(ref_freq_to_pix_freq < 4.0); - - // Need to figure out which side of odm combine we're in - // Assume the pipe instance under the same plane is in order - - if (odm_mode == dml_odm_mode_bypass) { - disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)((dml_float_t) hblank_end * ref_freq_to_pix_freq); - } else if (odm_mode == dml_odm_mode_combine_2to1 || odm_mode == dml_odm_mode_combine_4to1) { - // find out how many pipe are in this plane - dml_uint_t num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); - dml_uint_t first_pipe_idx_in_plane = __DML_NUM_PLANES__; - dml_uint_t pipe_idx_in_combine = 0; // pipe index within the plane - dml_uint_t odm_combine_factor = (odm_mode == dml_odm_mode_combine_2to1 ? 2 : 4); - - for (dml_uint_t i = 0; i < num_active_pipes; i++) { - if (dml_get_plane_idx(mode_lib, i) == plane_idx) { - if (i < first_pipe_idx_in_plane) { - first_pipe_idx_in_plane = i; - } - } - } - pipe_idx_in_combine = pipe_idx - first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) - - disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)(((dml_float_t) hblank_end + (dml_float_t) pipe_idx_in_combine * (dml_float_t) hactive / (dml_float_t) odm_combine_factor) * ref_freq_to_pix_freq); - dml_print("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); - dml_print("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, first_pipe_idx_in_plane); - dml_print("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, pipe_idx_in_combine); - dml_print("DML_DLG: %s: odm_combine_factor = %d\n", __func__, odm_combine_factor); - } - dml_print("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); - - ASSERT(disp_dlg_regs->refcyc_h_blank_end < (dml_uint_t)dml_pow(2, 13)); - - disp_dlg_regs->ref_freq_to_pix_freq = (dml_uint_t)(ref_freq_to_pix_freq * dml_pow(2, 19)); - temp = dml_pow(2, 8); - disp_dlg_regs->refcyc_per_htotal = (dml_uint_t)(ref_freq_to_pix_freq * (dml_float_t)htotal * temp); - disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits - - min_ttu_vblank = dml_get_min_ttu_vblank_in_us(mode_lib, pipe_idx); - min_dst_y_next_start = (dml_uint_t)(dml_get_min_dst_y_next_start(mode_lib, pipe_idx)); - - dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); - dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start); - dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); - - vready_after_vcount0 = (dml_uint_t)(dml_get_vready_at_or_after_vsync(mode_lib, pipe_idx)); - disp_dlg_regs->vready_after_vcount0 = vready_after_vcount0; - - dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); - - dst_x_after_scaler = (dml_uint_t)(dml_get_dst_x_after_scaler(mode_lib, pipe_idx)); - dst_y_after_scaler = (dml_uint_t)(dml_get_dst_y_after_scaler(mode_lib, pipe_idx)); - - dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); - dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); - - dst_y_prefetch = dml_get_dst_y_prefetch(mode_lib, pipe_idx); - dst_y_per_vm_vblank = dml_get_dst_y_per_vm_vblank(mode_lib, pipe_idx); - dst_y_per_row_vblank = dml_get_dst_y_per_row_vblank(mode_lib, pipe_idx); - dst_y_per_vm_flip = dml_get_dst_y_per_vm_flip(mode_lib, pipe_idx); - dst_y_per_row_flip = dml_get_dst_y_per_row_flip(mode_lib, pipe_idx); - - // magic! - if (htotal <= 75) { - max_dst_y_per_vm_vblank = 100.0; - max_dst_y_per_row_vblank = 100.0; - } - - dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); - dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); - dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); - dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); - dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); - - ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); - ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); - ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); - - vratio_pre_l = dml_get_vratio_prefetch_l(mode_lib, pipe_idx); - vratio_pre_c = dml_get_vratio_prefetch_c(mode_lib, pipe_idx); - - dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); - dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); - - // Active - refcyc_per_line_delivery_pre_l = dml_get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_line_delivery_l = dml_get_refcyc_per_line_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); - dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); - - if (dual_plane) { - refcyc_per_line_delivery_pre_c = dml_get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_line_delivery_c = dml_get_refcyc_per_line_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c); - dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c); - } - - disp_dlg_regs->refcyc_per_vm_dmdata = (dml_uint_t)(dml_get_refcyc_per_vm_dmdata_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); - disp_dlg_regs->dmdata_dl_delta = (dml_uint_t)(dml_get_dmdata_dl_delta_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); - - refcyc_per_req_delivery_pre_l = dml_get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_req_delivery_l = dml_get_refcyc_per_req_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); - - if (dual_plane) { - refcyc_per_req_delivery_pre_c = dml_get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_req_delivery_c = dml_get_refcyc_per_req_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); - } - - // TTU - Cursor - ASSERT(num_cursors <= 1); - if (num_cursors > 0) { - refcyc_per_req_delivery_pre_cur0 = dml_get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_req_delivery_cur0 = dml_get_refcyc_per_cursor_req_delivery_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0); - dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0); - } - - // Assign to register structures - disp_dlg_regs->min_dst_y_next_start = (dml_uint_t)((dml_float_t) min_dst_y_next_start * dml_pow(2, 2)); - ASSERT(disp_dlg_regs->min_dst_y_next_start < (dml_uint_t)dml_pow(2, 18)); - - disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line - disp_dlg_regs->refcyc_x_after_scaler = (dml_uint_t)((dml_float_t) dst_x_after_scaler * ref_freq_to_pix_freq); // in terms of refclk - disp_dlg_regs->dst_y_prefetch = (dml_uint_t)(dst_y_prefetch * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_vblank = (dml_uint_t)(dst_y_per_vm_vblank * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_vblank = (dml_uint_t)(dst_y_per_row_vblank * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_vm_flip = (dml_uint_t)(dst_y_per_vm_flip * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_row_flip = (dml_uint_t)(dst_y_per_row_flip * dml_pow(2, 2)); - - disp_dlg_regs->vratio_prefetch = (dml_uint_t)(vratio_pre_l * dml_pow(2, 19)); - disp_dlg_regs->vratio_prefetch_c = (dml_uint_t)(vratio_pre_c * dml_pow(2, 19)); - - dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); - dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); - dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); - dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); - - disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); - disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); - disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); - disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_get_refcyc_per_vm_req_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); - - dst_y_per_pte_row_nom_l = dml_get_dst_y_per_pte_row_nom_l(mode_lib, pipe_idx); - dst_y_per_pte_row_nom_c = dml_get_dst_y_per_pte_row_nom_c(mode_lib, pipe_idx); - dst_y_per_meta_row_nom_l = dml_get_dst_y_per_meta_row_nom_l(mode_lib, pipe_idx); - dst_y_per_meta_row_nom_c = dml_get_dst_y_per_meta_row_nom_c(mode_lib, pipe_idx); - - refcyc_per_pte_group_nom_l = dml_get_refcyc_per_pte_group_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_pte_group_nom_c = dml_get_refcyc_per_pte_group_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_pte_group_vblank_l = dml_get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_pte_group_vblank_c = dml_get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_pte_group_flip_l = dml_get_refcyc_per_pte_group_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_pte_group_flip_c = dml_get_refcyc_per_pte_group_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - refcyc_per_meta_chunk_nom_l = dml_get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_meta_chunk_nom_c = dml_get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_meta_chunk_vblank_l = dml_get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_meta_chunk_vblank_c = dml_get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_meta_chunk_flip_l = dml_get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - refcyc_per_meta_chunk_flip_c = dml_get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; - - disp_dlg_regs->dst_y_per_pte_row_nom_l = (dml_uint_t)(dst_y_per_pte_row_nom_l * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_pte_row_nom_c = (dml_uint_t)(dst_y_per_pte_row_nom_c * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_meta_row_nom_l = (dml_uint_t)(dst_y_per_meta_row_nom_l * dml_pow(2, 2)); - disp_dlg_regs->dst_y_per_meta_row_nom_c = (dml_uint_t)(dst_y_per_meta_row_nom_c * dml_pow(2, 2)); - disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(refcyc_per_pte_group_nom_l); - disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(refcyc_per_pte_group_nom_c); - disp_dlg_regs->refcyc_per_pte_group_vblank_l = (dml_uint_t)(refcyc_per_pte_group_vblank_l); - disp_dlg_regs->refcyc_per_pte_group_vblank_c = (dml_uint_t)(refcyc_per_pte_group_vblank_c); - disp_dlg_regs->refcyc_per_pte_group_flip_l = (dml_uint_t)(refcyc_per_pte_group_flip_l); - disp_dlg_regs->refcyc_per_pte_group_flip_c = (dml_uint_t)(refcyc_per_pte_group_flip_c); - disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(refcyc_per_meta_chunk_nom_l); - disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(refcyc_per_meta_chunk_nom_c); - disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (dml_uint_t)(refcyc_per_meta_chunk_vblank_l); - disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (dml_uint_t)(refcyc_per_meta_chunk_vblank_c); - disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (dml_uint_t)(refcyc_per_meta_chunk_flip_l); - disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (dml_uint_t)(refcyc_per_meta_chunk_flip_c); - disp_dlg_regs->refcyc_per_line_delivery_pre_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_l, 1); - disp_dlg_regs->refcyc_per_line_delivery_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_l, 1); - disp_dlg_regs->refcyc_per_line_delivery_pre_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_c, 1); - disp_dlg_regs->refcyc_per_line_delivery_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_c, 1); - - disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; - disp_dlg_regs->dst_y_offset_cur0 = 0; - disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; - disp_dlg_regs->dst_y_offset_cur1 = 0; - - disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off - - disp_ttu_regs->refcyc_per_req_delivery_pre_l = (dml_uint_t)(refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_l = (dml_uint_t)(refcyc_per_req_delivery_l * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_c = (dml_uint_t)(refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_c = (dml_uint_t)(refcyc_per_req_delivery_c * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (dml_uint_t)(refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_cur0 = (dml_uint_t)(refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); - disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0; - disp_ttu_regs->refcyc_per_req_delivery_cur1 = 0; - disp_ttu_regs->qos_level_low_wm = 0; - - disp_ttu_regs->qos_level_high_wm = (dml_uint_t)(4.0 * (dml_float_t)htotal * ref_freq_to_pix_freq); - - disp_ttu_regs->qos_level_flip = 14; - disp_ttu_regs->qos_level_fixed_l = 8; - disp_ttu_regs->qos_level_fixed_c = 8; - disp_ttu_regs->qos_level_fixed_cur0 = 8; - disp_ttu_regs->qos_ramp_disable_l = 0; - disp_ttu_regs->qos_ramp_disable_c = 0; - disp_ttu_regs->qos_ramp_disable_cur0 = 0; - disp_ttu_regs->min_ttu_vblank = (dml_uint_t)(min_ttu_vblank * refclk_freq_in_mhz); - - // CHECK for HW registers' range, assert or clamp - ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); - ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); - ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); - ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); - if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_group_flip >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); - - if (disp_dlg_regs->refcyc_per_vm_req_flip >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_pow(2, 23) - 1); - - - ASSERT(disp_dlg_regs->dst_y_after_scaler < (dml_uint_t)8); - ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (dml_uint_t)dml_pow(2, 17)); - if (dual_plane) { - if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (dml_uint_t)dml_pow(2, 17)) { // FIXME what so special about chroma, can we just assert? - dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (dml_uint_t)dml_pow(2, 17) - 1); - } - } - ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (dml_uint_t)dml_pow(2, 17)); - ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (dml_uint_t)dml_pow(2, 17)); - - if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); - if (dual_plane) { - if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); - } - ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (dml_uint_t)dml_pow(2, 13)); - if (dual_plane) { - ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (dml_uint_t)dml_pow(2, 13)); - } - - if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); - if (dual_plane) { - if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (dml_uint_t)dml_pow(2, 23)) - disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); - } - ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_c < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (dml_uint_t)dml_pow(2, 13)); - ASSERT(disp_ttu_regs->qos_level_low_wm < (dml_uint_t) dml_pow(2, 14)); - ASSERT(disp_ttu_regs->qos_level_high_wm < (dml_uint_t) dml_pow(2, 14)); - ASSERT(disp_ttu_regs->min_ttu_vblank < (dml_uint_t) dml_pow(2, 24)); - - dml_print_ttu_regs_st(disp_ttu_regs); - dml_print_dlg_regs_st(disp_dlg_regs); - dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); -} - -void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param) -{ - memset(arb_param, 0, sizeof(*arb_param)); - arb_param->max_req_outstanding = 256; - arb_param->min_req_outstanding = 256; // turn off the sat level feature if this set to max - arb_param->sat_level_us = 60; - arb_param->hvm_max_qos_commit_threshold = 0xf; - arb_param->hvm_min_req_outstand_commit_threshold = 0xa; - arb_param->compbuf_reserved_space_kbytes = 2 * 8; // assume max data chunk size of 8K -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h deleted file mode 100644 index bf491cf0582d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __DML_DISPLAY_RQ_DLG_CALC_H__ -#define __DML_DISPLAY_RQ_DLG_CALC_H__ - -#include "display_mode_core_structs.h" -#include "display_mode_lib_defines.h" - -struct display_mode_lib_st; - -// Function: dml_rq_dlg_get_rq_reg -// Main entry point for test to get the register values out of this DML class. -// This function calls and fucntions to calculate -// and then populate the rq_regs struct -// Input: -// Assume mode_program is already called -// Output: -// rq_regs - struct that holds all the RQ registers field value. -// See also: - -void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, - struct display_mode_lib_st *mode_lib, - const dml_uint_t pipe_idx); - -// Function: dml_rq_dlg_get_dlg_reg -// Calculate and return DLG and TTU register struct given the system setting -// Output: -// dlg_regs - output DLG register struct -// ttu_regs - output DLG TTU register struct -// Input: -// Assume mode_program is already called -// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg -void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *dlg_regs, - dml_display_ttu_regs_st *ttu_regs, - struct display_mode_lib_st *mode_lib, - const dml_uint_t pipe_idx); - -// Function: dml_rq_dlg_get_arb_params -void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h deleted file mode 100644 index 2a2f84e07ca8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ -#ifndef __DML_LOGGING_H__ -#define __DML_LOGGING_H__ - -#define dml_print(...) ((void)0) - -#endif //__DML_LOGGING_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile new file mode 100644 index 000000000000..97e068b6bf6b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/Makefile @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT */ +# +# Copyright 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Authors: AMD +# +# Makefile for dml2. + +dml2_ccflags := $(CC_FLAGS_FPU) +dml2_rcflags := $(CC_FLAGS_NO_FPU) + +ifneq ($(CONFIG_FRAME_WARN),0) + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif + else + frame_warn_limit := 2056 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif +endif + +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0 +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_core +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_mcg/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_dpmm/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_pmo/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/ +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/src/inc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/inc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2_0/dml21/ + +CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_core.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/display_mode_util.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_wrapper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_utils.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_policy.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_translation_helper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_mall_phantom.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml_display_rq_dlg_calc.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml2_dc_resource_mgmt.o := $(dml2_rcflags) + +DML2 = display_mode_core.o display_mode_util.o dml2_wrapper.o \ + dml2_utils.o dml2_policy.o dml2_translation_helper.o dml2_dc_resource_mgmt.o dml2_mall_phantom.o \ + dml_display_rq_dlg_calc.o + +AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2_0/,$(DML2)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DML2) + +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/src/dml21_wrapper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_translation_helper.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2_0/dml21/dml21_utils.o := $(dml2_rcflags) + +DML21 := src/dml2_top/dml2_top_interfaces.o +DML21 += src/dml2_top/dml2_top_soc15.o +DML21 += src/dml2_core/dml2_core_dcn4.o +DML21 += src/dml2_core/dml2_core_utils.o +DML21 += src/dml2_core/dml2_core_factory.o +DML21 += src/dml2_core/dml2_core_dcn4_calcs.o +DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o +DML21 += src/dml2_dpmm/dml2_dpmm_factory.o +DML21 += src/dml2_mcg/dml2_mcg_dcn4.o +DML21 += src/dml2_mcg/dml2_mcg_factory.o +DML21 += src/dml2_pmo/dml2_pmo_dcn3.o +DML21 += src/dml2_pmo/dml2_pmo_factory.o +DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o +DML21 += src/dml2_standalone_libraries/lib_float_math.o +DML21 += dml21_translation_helper.o +DML21 += dml21_wrapper.o +DML21 += dml21_utils.o + +AMD_DAL_DML21 = $(addprefix $(AMDDALPATH)/dc/dml2_0/dml21/,$(DML21)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DML21) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h new file mode 100644 index 000000000000..b954c9648fbe --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/cmntypes.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __CMNTYPES_H__ +#define __CMNTYPES_H__ + +#ifdef __GNUC__ +#if __GNUC__ == 4 && __GNUC_MINOR__ > 7 +typedef unsigned int uint; +#endif +#endif + +typedef signed char int8, *pint8; +typedef signed short int16, *pint16; +typedef signed int int32, *pint32; +typedef signed int64, *pint64; + +typedef unsigned char uint8, *puint8; +typedef unsigned short uint16, *puint16; +typedef unsigned int uint32, *puint32; +typedef unsigned uint64, *puint64; + +typedef unsigned long int ulong; +typedef unsigned char uchar; +typedef unsigned int uint; + +typedef void *pvoid; +typedef char *pchar; +typedef const void *const_pvoid; +typedef const char *const_pchar; + +typedef struct rgba_struct { + uint8 a; + uint8 r; + uint8 g; + uint8 b; +} rgba_t; + +typedef struct { + uint8 blue; + uint8 green; + uint8 red; + uint8 alpha; +} gen_color_t; + +typedef union { + uint32 val; + gen_color_t f; +} gen_color_u; + +// +// Types to make it easy to get or set the bits of a float/double. +// Avoids automatic casting from int to float and back. +// +#if 0 +typedef union { + uint32 i; + float f; +} uintfloat32; + +typedef union { + uint64 i; + double f; +} uintfloat64; + +#ifndef UNREFERENCED_PARAMETER +#define UNREFERENCED_PARAMETER(x) (x = x) +#endif +#endif + +#endif //__CMNTYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c new file mode 100644 index 000000000000..c468f492b876 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.c @@ -0,0 +1,10337 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "display_mode_util.h" +#include "display_mode_lib_defines.h" + +#include "dml_assert.h" + +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 +#define TB_BORROWED_MAX 400 +#define DML_MAX_VSTARTUP_START 1023 + +// --------------------------- +// Declaration Begins +// --------------------------- +static void CalculateBytePerPixelAndBlockSizes( + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + // Output + dml_uint_t *BytePerPixelY, + dml_uint_t *BytePerPixelC, + dml_float_t *BytePerPixelDETY, + dml_float_t *BytePerPixelDETC, + dml_uint_t *BlockHeight256BytesY, + dml_uint_t *BlockHeight256BytesC, + dml_uint_t *BlockWidth256BytesY, + dml_uint_t *BlockWidth256BytesC, + dml_uint_t *MacroTileHeightY, + dml_uint_t *MacroTileHeightC, + dml_uint_t *MacroTileWidthY, + dml_uint_t *MacroTileWidthC); + +static dml_float_t CalculateWriteBackDISPCLK( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t PixelClock, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackHTaps, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackSourceWidth, + dml_uint_t WritebackDestinationWidth, + dml_uint_t HTotal, + dml_uint_t WritebackLineBufferSize, + dml_float_t DISPCLKDPPCLKVCOSpeed); + +static void CalculateVMRowAndSwath( + struct display_mode_lib_scratch_st *s, + struct CalculateVMRowAndSwath_params_st *p); + +static void CalculateOutputLink( + dml_float_t PHYCLKPerState, + dml_float_t PHYCLKD18PerState, + dml_float_t PHYCLKD32PerState, + dml_float_t Downspreading, + dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClockBackEnd, + dml_float_t ForcedOutputLinkBPP, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t NumberOfDSCSlices, + dml_float_t AudioSampleRate, + dml_uint_t AudioSampleLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + enum dml_dsc_enable DSCEnable, + dml_uint_t OutputLinkDPLanes, + enum dml_output_link_dp_rate OutputLinkDPRate, + + // Output + dml_bool_t *RequiresDSC, + dml_bool_t *RequiresFEC, + dml_float_t *OutBpp, + enum dml_output_type_and_rate__type *OutputType, + enum dml_output_type_and_rate__rate *OutputRate, + dml_uint_t *RequiredSlots); + +static void CalculateODMMode( + dml_uint_t MaximumPixelsPerLinePerDSCUnit, + dml_uint_t HActive, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + enum dml_odm_use_policy ODMUse, + dml_float_t StateDispclk, + dml_float_t MaxDispclk, + dml_bool_t DSCEnable, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t MaxNumDPP, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_uint_t NumberOfDSCSlices, + + // Output + dml_bool_t *TotalAvailablePipesSupport, + dml_uint_t *NumberOfDPP, + enum dml_odm_mode *ODMMode, + dml_float_t *RequiredDISPCLKPerSurface); + +static dml_float_t CalculateRequiredDispclk( + enum dml_odm_mode ODMMode, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t MaxDispclkSingle); + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + dml_float_t HRatio, + dml_float_t HRatioChroma, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t MaxDCHUBToPSCLThroughput, + dml_float_t MaxPSCLToLBThroughput, + dml_float_t PixelClock, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t HTaps, + dml_uint_t HTapsChroma, + dml_uint_t VTaps, + dml_uint_t VTapsChroma, + + // Output + dml_float_t *PSCL_THROUGHPUT, + dml_float_t *PSCL_THROUGHPUT_CHROMA, + dml_float_t *DPPCLKUsingSingleDPP); + +static void CalculateDPPCLK( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t DPPCLKUsingSingleDPP[], + dml_uint_t DPPPerSurface[], + + // Output + dml_float_t *GlobalDPPCLK, + dml_float_t Dppclk[]); + +static void CalculateMALLUseForStaticScreen( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCNFinal, + enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t one_row_per_frame_fits_in_buffer[], + + // Output + dml_bool_t UsesMALLForStaticScreen[]); + +static dml_uint_t dscceComputeDelay( + dml_uint_t bpc, + dml_float_t BPP, + dml_uint_t sliceWidth, + dml_uint_t numSlices, + enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output); + +static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output); + +static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, + struct CalculatePrefetchSchedule_params_st *p); + +static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed); + +static void CalculateDCCConfiguration( + dml_bool_t DCCEnabled, + dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceWidthLuma, + dml_uint_t SurfaceWidthChroma, + dml_uint_t SurfaceHeightLuma, + dml_uint_t SurfaceHeightChroma, + dml_uint_t nomDETInKByte, + dml_uint_t RequestHeight256ByteLuma, + dml_uint_t RequestHeight256ByteChroma, + enum dml_swizzle_mode TilingFormat, + dml_uint_t BytePerPixelY, + dml_uint_t BytePerPixelC, + dml_float_t BytePerPixelDETY, + dml_float_t BytePerPixelDETC, + enum dml_rotation_angle SourceScan, + // Output + dml_uint_t *MaxUncompressedBlockLuma, + dml_uint_t *MaxUncompressedBlockChroma, + dml_uint_t *MaxCompressedBlockLuma, + dml_uint_t *MaxCompressedBlockChroma, + dml_uint_t *IndependentBlockLuma, + dml_uint_t *IndependentBlockChroma); + +static dml_uint_t CalculatePrefetchSourceLines( + dml_float_t VRatio, + dml_uint_t VTaps, + dml_bool_t Interlace, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_uint_t SwathHeight, + enum dml_rotation_angle SourceScan, + dml_bool_t ViewportStationary, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + + // Output + dml_uint_t *VInitPreFill, + dml_uint_t *MaxNumSwath); + +static dml_uint_t CalculateVMAndRowBytes( + dml_bool_t ViewportStationary, + dml_bool_t DCCEnable, + dml_uint_t NumberOfDPPs, + dml_uint_t BlockHeight256Bytes, + dml_uint_t BlockWidth256Bytes, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceTiling, + dml_uint_t BytePerPixel, + enum dml_rotation_angle SourceScan, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t GPUVMMinPageSizeKBytes, + dml_uint_t PTEBufferSizeInRequests, + dml_uint_t Pitch, + dml_uint_t DCCMetaPitch, + dml_uint_t MacroTileWidth, + dml_uint_t MacroTileHeight, + + // Output + dml_uint_t *MetaRowByte, + dml_uint_t *PixelPTEBytesPerRow, + dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check + dml_uint_t *dpte_row_width_ub, + dml_uint_t *dpte_row_height, + dml_uint_t *dpte_row_height_linear, + dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + dml_uint_t *dpte_row_width_ub_one_row_per_frame, + dml_uint_t *dpte_row_height_one_row_per_frame, + dml_uint_t *MetaRequestWidth, + dml_uint_t *MetaRequestHeight, + dml_uint_t *meta_row_width, + dml_uint_t *meta_row_height, + dml_uint_t *PixelPTEReqWidth, + dml_uint_t *PixelPTEReqHeight, + dml_uint_t *PTERequestSize, + dml_uint_t *DPDE0BytesFrame, + dml_uint_t *MetaPTEBytesFrame); + +static dml_float_t CalculateTWait( + dml_uint_t PrefetchMode, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + dml_bool_t DRRDisplay, + dml_float_t DRAMClockChangeLatency, + dml_float_t FCLKChangeLatency, + dml_float_t UrgentLatency, + dml_float_t SREnterPlusExitTime); + +static void CalculatePrefetchMode( + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, + dml_uint_t *MinPrefetchMode, + dml_uint_t *MaxPrefetchMode); + +static void CalculateRowBandwidth( + dml_bool_t GPUVMEnable, + enum dml_source_format_class SourcePixelFormat, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_bool_t DCCEnable, + dml_float_t LineTime, + dml_uint_t MetaRowByteLuma, + dml_uint_t MetaRowByteChroma, + dml_uint_t meta_row_height_luma, + dml_uint_t meta_row_height_chroma, + dml_uint_t PixelPTEBytesPerRowLuma, + dml_uint_t PixelPTEBytesPerRowChroma, + dml_uint_t dpte_row_height_luma, + dml_uint_t dpte_row_height_chroma, + // Output + dml_float_t *meta_row_bw, + dml_float_t *dpte_row_bw); + +static void CalculateFlipSchedule( + dml_float_t HostVMInefficiencyFactor, + dml_float_t UrgentExtraLatency, + dml_float_t UrgentLatency, + dml_uint_t GPUVMMaxPageTableLevels, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMaxNonCachedPageTableLevels, + dml_bool_t GPUVMEnable, + dml_uint_t HostVMMinPageSize, + dml_float_t PDEAndMetaPTEBytesPerFrame, + dml_float_t MetaRowBytes, + dml_float_t DPTEBytesPerRow, + dml_float_t BandwidthAvailableForImmediateFlip, + dml_uint_t TotImmediateFlipBytes, + enum dml_source_format_class SourcePixelFormat, + dml_float_t LineTime, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t Tno_bw, + dml_bool_t DCCEnable, + dml_uint_t dpte_row_height, + dml_uint_t meta_row_height, + dml_uint_t dpte_row_height_chroma, + dml_uint_t meta_row_height_chroma, + dml_bool_t use_one_row_for_frame_flip, + + // Output + dml_float_t *DestinationLinesToRequestVMInImmediateFlip, + dml_float_t *DestinationLinesToRequestRowInImmediateFlip, + dml_float_t *final_flip_bw, + dml_bool_t *ImmediateFlipSupportedForPipe); + +static dml_float_t CalculateWriteBackDelay( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackDestinationWidth, + dml_uint_t WritebackDestinationHeight, + dml_uint_t WritebackSourceHeight, + dml_uint_t HTotal); + +static void CalculateVUpdateAndDynamicMetadataParameters( + dml_uint_t MaxInterDCNTileRepeaters, + dml_float_t Dppclk, + dml_float_t DISPCLK, + dml_float_t DCFClkDeepSleep, + dml_float_t PixelClock, + dml_uint_t HTotal, + dml_uint_t VBlank, + dml_uint_t DynamicMetadataTransmittedBytes, + dml_uint_t DynamicMetadataLinesBeforeActiveRequired, + dml_uint_t InterlaceEnable, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_float_t *TSetup, + dml_float_t *Tdmbf, + dml_float_t *Tdmec, + dml_float_t *Tdmsks, + dml_uint_t *VUpdateOffsetPix, + dml_uint_t *VUpdateWidthPix, + dml_uint_t *VReadyOffsetPix); + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported); + +static dml_float_t TruncToValidBPP( + dml_float_t LinkBitRate, + dml_uint_t Lanes, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClock, + dml_float_t DesiredBPP, + dml_bool_t DSCEnable, + enum dml_output_encoder_class Output, + enum dml_output_format_class Format, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t DSCSlices, + dml_uint_t AudioRate, + dml_uint_t AudioLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + // Output + dml_uint_t *RequiredSlotsSingle); + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct display_mode_lib_scratch_st *s, + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p); + +static void CalculateDCFCLKDeepSleep( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t ReturnBusWidth, + + // Output + dml_float_t *DCFCLKDeepSleep); + +static void CalculateUrgentBurstFactor( + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_uint_t swath_width_luma_ub, + dml_uint_t swath_width_chroma_ub, + dml_uint_t SwathHeightY, + dml_uint_t SwathHeightC, + dml_float_t LineTime, + dml_float_t UrgentLatency, + dml_float_t CursorBufferSize, + dml_uint_t CursorWidth, + dml_uint_t CursorBPP, + dml_float_t VRatio, + dml_float_t VRatioC, + dml_float_t BytePerPixelInDETY, + dml_float_t BytePerPixelInDETC, + dml_uint_t DETBufferSizeY, + dml_uint_t DETBufferSizeC, + // Output + dml_float_t *UrgentBurstFactorCursor, + dml_float_t *UrgentBurstFactorLuma, + dml_float_t *UrgentBurstFactorChroma, + dml_bool_t *NotEnoughUrgentLatencyHiding); + +static dml_float_t RequiredDTBCLK( + dml_bool_t DSCEnable, + dml_float_t PixelClock, + enum dml_output_format_class OutputFormat, + dml_float_t OutputBpp, + dml_uint_t DSCSlices, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_uint_t AudioRate, + dml_uint_t AudioLayoutSingle); + +static void UseMinimumDCFCLK( + struct display_mode_lib_scratch_st *scratch, + struct UseMinimumDCFCLK_params_st *p); + +static void CalculatePixelDeliveryTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t VRatioPrefetchY[], + dml_float_t VRatioPrefetchC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t NumberOfCursors[], + dml_uint_t CursorWidth[], + dml_uint_t CursorBPP[], + dml_uint_t BlockWidth256BytesY[], + dml_uint_t BlockHeight256BytesY[], + dml_uint_t BlockWidth256BytesC[], + dml_uint_t BlockHeight256BytesC[], + + // Output + dml_float_t DisplayPipeLineDeliveryTimeLuma[], + dml_float_t DisplayPipeLineDeliveryTimeChroma[], + dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeLuma[], + dml_float_t DisplayPipeRequestDeliveryTimeChroma[], + dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], + dml_float_t CursorRequestDeliveryTime[], + dml_float_t CursorRequestDeliveryTimePrefetch[]); + +static void CalculateMetaAndPTETimes( + dml_bool_t use_one_row_for_frame[], + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t MetaChunkSize, + dml_uint_t MinMetaChunkSizeBytes, + dml_uint_t HTotal[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t DestinationLinesToRequestRowInVBlank[], + dml_float_t DestinationLinesToRequestRowInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t dpte_row_height[], + dml_uint_t dpte_row_height_chroma[], + dml_uint_t meta_row_width[], + dml_uint_t meta_row_width_chroma[], + dml_uint_t meta_row_height[], + dml_uint_t meta_row_height_chroma[], + dml_uint_t meta_req_width[], + dml_uint_t meta_req_width_chroma[], + dml_uint_t meta_req_height[], + dml_uint_t meta_req_height_chroma[], + dml_uint_t dpte_group_bytes[], + dml_uint_t PTERequestSizeY[], + dml_uint_t PTERequestSizeC[], + dml_uint_t PixelPTEReqWidthY[], + dml_uint_t PixelPTEReqHeightY[], + dml_uint_t PixelPTEReqWidthC[], + dml_uint_t PixelPTEReqHeightC[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + + // Output + dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], + dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], + dml_float_t DST_Y_PER_META_ROW_NOM_L[], + dml_float_t DST_Y_PER_META_ROW_NOM_C[], + dml_float_t TimePerMetaChunkNominal[], + dml_float_t TimePerChromaMetaChunkNominal[], + dml_float_t TimePerMetaChunkVBlank[], + dml_float_t TimePerChromaMetaChunkVBlank[], + dml_float_t TimePerMetaChunkFlip[], + dml_float_t TimePerChromaMetaChunkFlip[], + dml_float_t time_per_pte_group_nom_luma[], + dml_float_t time_per_pte_group_vblank_luma[], + dml_float_t time_per_pte_group_flip_luma[], + dml_float_t time_per_pte_group_nom_chroma[], + dml_float_t time_per_pte_group_vblank_chroma[], + dml_float_t time_per_pte_group_flip_chroma[]); + +static void CalculateVMGroupAndRequestTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t HTotal[], + dml_uint_t BytePerPixelC[], + dml_float_t DestinationLinesToRequestVMInVBlank[], + dml_float_t DestinationLinesToRequestVMInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + dml_uint_t vm_group_bytes[], + dml_uint_t dpde0_bytes_per_frame_ub_l[], + dml_uint_t dpde0_bytes_per_frame_ub_c[], + dml_uint_t meta_pte_bytes_per_frame_ub_l[], + dml_uint_t meta_pte_bytes_per_frame_ub_c[], + + // Output + dml_float_t TimePerVMGroupVBlank[], + dml_float_t TimePerVMGroupFlip[], + dml_float_t TimePerVMRequestVBlank[], + dml_float_t TimePerVMRequestFlip[]); + +static void CalculateStutterEfficiency( + struct display_mode_lib_scratch_st *scratch, + struct CalculateStutterEfficiency_params_st *p); + +static void CalculateSwathAndDETConfiguration( + struct display_mode_lib_scratch_st *scratch, + struct CalculateSwathAndDETConfiguration_params_st *p); + +static void CalculateSwathWidth( + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + enum dml_source_format_class SourcePixelFormat[], + enum dml_rotation_angle SourceScan[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportWidth[], + dml_uint_t ViewportHeight[], + dml_uint_t ViewportXStart[], + dml_uint_t ViewportYStart[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + enum dml_odm_mode ODMMode[], + dml_uint_t BytePerPixY[], + dml_uint_t BytePerPixC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t BlendingAndTiming[], + dml_uint_t HActive[], + dml_float_t HRatio[], + dml_uint_t DPPPerSurface[], + + // Output + dml_uint_t SwathWidthSingleDPPY[], + dml_uint_t SwathWidthSingleDPPC[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t MaximumSwathHeightY[], + dml_uint_t MaximumSwathHeightC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[]); + +static dml_float_t CalculateExtraLatency( + dml_uint_t RoundTripPingLatencyCycles, + dml_uint_t ReorderingBytes, + dml_float_t DCFCLK, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_float_t ReturnBW, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels); + +static dml_uint_t CalculateExtraLatencyBytes( + dml_uint_t ReorderingBytes, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels); + +static dml_float_t CalculateUrgentLatency( + dml_float_t UrgentLatencyPixelDataOnly, + dml_float_t UrgentLatencyPixelMixedWithVMData, + dml_float_t UrgentLatencyVMDataOnly, + dml_bool_t DoUrgentLatencyAdjustment, + dml_float_t UrgentLatencyAdjustmentFabricClockComponent, + dml_float_t UrgentLatencyAdjustmentFabricClockReference, + dml_float_t FabricClockSingle); + +static dml_bool_t UnboundedRequest( + enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, + dml_uint_t TotalNumberOfActiveDPP, + dml_bool_t NoChromaOrLinear, + enum dml_output_encoder_class Output); + +static void CalculateSurfaceSizeInMall( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCN, + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + dml_bool_t DCCEnable[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportXStartY[], + dml_uint_t ViewportYStartY[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t ViewportWidthY[], + dml_uint_t ViewportHeightY[], + dml_uint_t BytesPerPixelY[], + dml_uint_t ViewportWidthC[], + dml_uint_t ViewportHeightC[], + dml_uint_t BytesPerPixelC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t ReadBlockWidthY[], + dml_uint_t ReadBlockWidthC[], + dml_uint_t ReadBlockHeightY[], + dml_uint_t ReadBlockHeightC[], + + // Output + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t *ExceededMALLSize); + +static void CalculateDETBufferSize( + dml_uint_t DETSizeOverride[], + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t UnboundedRequestEnabled, + dml_uint_t nomDETInKByte, + dml_uint_t MaxTotalDETInKByte, + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t MinCompressedBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInkByte, + dml_uint_t CompressedBufferSegmentSizeInkByteFinal, + enum dml_source_format_class SourcePixelFormat[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t RotesY[], + dml_uint_t RoundedUpMaxSwathSizeBytesC[], + dml_uint_t DPPPerSurface[], + // Output + dml_uint_t DETBufferSizeInKByte[], + dml_uint_t *CompressedBufferSizeInkByte); + +static void CalculateMaxDETAndMinCompressedBufferSize( + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInKByte, + dml_uint_t ROBBufferSizeInKByte, + dml_uint_t MaxNumDPP, + dml_bool_t nomDETInKByteOverrideEnable, + dml_uint_t nomDETInKByteOverrideValue, + + // Output + dml_uint_t *MaxTotalDETInKByte, + dml_uint_t *nomDETInKByte, + dml_uint_t *MinCompressedBufferSizeInKByte); + +static dml_uint_t DSCDelayRequirement( + dml_bool_t DSCEnabled, + enum dml_odm_mode ODMMode, + dml_uint_t DSCInputBitPerComponent, + dml_float_t OutputBpp, + dml_uint_t HActive, + dml_uint_t HTotal, + dml_uint_t NumberOfDSCSlices, + enum dml_output_format_class OutputFormat, + enum dml_output_encoder_class Output, + dml_float_t PixelClock, + dml_float_t PixelClockBackEnd); + +static dml_bool_t CalculateVActiveBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[]); + +static void CalculatePrefetchBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *PrefetchBandwidth, + dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *PrefetchBandwidthSupport); + +static dml_float_t CalculateBandwidthAvailableForImmediateFlip( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t cursor_bw_pre[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[]); + +static void CalculateImmediateFlipBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + enum dml_immediate_flip_requirement ImmediateFlipRequirement[], + dml_float_t final_flip_bw[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *TotalBandwidth, + dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *ImmediateFlipBandwidthSupport); + +// --------------------------- +// Declaration Ends +// --------------------------- + +static dml_uint_t dscceComputeDelay( + dml_uint_t bpc, + dml_float_t BPP, + dml_uint_t sliceWidth, + dml_uint_t numSlices, + enum dml_output_format_class pixelFormat, + enum dml_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + dml_uint_t rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, + Delay, pixels; + + if (pixelFormat == dml_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml_444) + pixelsPerClock = 1; + else if (pixelFormat == dml_n422) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1)); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422) + s = 0; + else + s = 1; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + L = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: bpc: %u\n", __func__, bpc); + dml_print("DML::%s: BPP: %f\n", __func__, BPP); + dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + dml_print("DML::%s: numSlices: %u\n", __func__, numSlices); + dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + dml_print("DML::%s: Output: %u\n", __func__, Output); + dml_print("DML::%s: pixels: %u\n", __func__, pixels); +#endif + return pixels; +} + +static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output) +{ + dml_uint_t Delay = 0; + + if (pixelFormat == dml_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dml_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + dml_print("DML::%s: Delay = %u\n", __func__, Delay); +#endif + + return Delay; +} + +static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch, + struct CalculatePrefetchSchedule_params_st *p) +{ + struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals; + + s->MyError = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingMetaPTE = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + s->Tvm_trips = 0.0; + s->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + s->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + s->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est3 = 0.0; + + if (p->GPUVMEnable == true && p->HostVMEnable == true) { + s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels); + dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup); + dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable); + dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->UrgentLatency; + s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1); + + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem; + } else { + *p->Tdmdl = p->TWait + p->UrgentExtraLatency; + } + +#ifdef __DML_VBA_ALLOW_DELTA__ + if (DynamicMetadataEnable == false) { + *Tdmdl = 0.0; + } +#endif + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0); + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0); + *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); +#endif + + if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + + s->MyError = false; + + s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + + if (p->GPUVMEnable == true) { + s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + if (p->GPUVMPageTableLevels >= 3) { + *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1); + } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) { + s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tno_bw = p->UrgentExtraLatency; + } else { + *p->Tno_bw = 0; + } + } else if (p->myPipe->DCCEnable == true) { + s->Tvm_trips_rounded = s->LineTime / 4.0; + s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tno_bw = 0; + } else { + s->Tvm_trips_rounded = s->LineTime / 4.0; + s->Tr0_trips_rounded = s->LineTime / 2.0; + *p->Tno_bw = 0; + } + s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0); + s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0); + + if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface; + if (p->myPipe->VRatio < 1.0) + s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; + + s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); + + s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); + + s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0); + s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + + if (p->GPUVMEnable == true) { + s->Tvm_oto = dml_max3( + s->Tvm_trips, + *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); + } else + s->Tvm_oto = s->LineTime / 4.0; + + if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { + s->Tr0_oto = dml_max4( + s->Tr0_trips, + (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto, + (s->LineTime - s->Tvm_oto)/2.0, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0; + + s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal); + s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw); + dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); +#endif + + s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + + dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime); + dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup); + dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + + s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor); + + if (s->prefetch_sw_bytes < s->dep_bytes) { + s->prefetch_sw_bytes = 2 * s->dep_bytes; + } + + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + if (s->dst_y_prefetch_equ > 1) { + + if (s->Tpre_rounded - *p->Tno_bw > 0) { + s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + + s->prefetch_sw_bytes) + / (s->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1; + } else + s->PrefetchBandwidth1 = 0; + + if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) { + s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / + (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + } + + if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); + else + s->PrefetchBandwidth2 = 0; + + if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3; + } + else + s->PrefetchBandwidth3 = 0; + + + if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) { + s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + } + + if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->PrefetchBandwidth4 = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded); + dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1); + dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2); + dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3); + dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4); +#endif + { + dml_bool_t Case1OK; + dml_bool_t Case2OK; + dml_bool_t Case3OK; + + if (s->PrefetchBandwidth1 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) { + Case1OK = true; + } else { + Case1OK = false; + } + } else { + Case1OK = false; + } + + if (s->PrefetchBandwidth2 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) { + Case2OK = true; + } else { + Case2OK = false; + } + } else { + Case2OK = false; + } + + if (s->PrefetchBandwidth3 > 0) { + if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) { + Case3OK = true; + } else { + Case3OK = false; + } + } else { + Case3OK = false; + } + + if (Case1OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->PrefetchBandwidth3; + } else { + s->prefetch_bw_equ = s->PrefetchBandwidth4; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK); + dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK); + dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + + if (s->prefetch_bw_equ > 0) { + if (p->GPUVMEnable == true) { + s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) { + s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4); + } else { + s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } + + + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingMetaPTE = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ; + s->TimeForFetchingMetaPTE = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) { + *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0; + *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } + } + + s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); +#endif + + if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, + (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData + * p->myPipe->BytePerPixelY + * p->swath_width_luma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma); +#endif + *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData + *p->myPipe->BytePerPixelC + *p->swath_width_chroma_ub / s->LineTime; + } else { + s->MyError = true; + dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE); + dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank); + dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime); + dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + s->MyError = true; + dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + s->TimeForFetchingMetaPTE = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + { + dml_float_t prefetch_vm_bw; + dml_float_t prefetch_row_bw; + + if (p->PDEAndMetaPTEBytesFrame == 0) { + prefetch_vm_bw = 0; + } else if (*p->DestinationLinesToRequestVMInVBlank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->MyError = true; + dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank); + } + + if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) { + prefetch_row_bw = 0; + } else if (*p->DestinationLinesToRequestRowInVBlank > 0) { + prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->MyError = true; + dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank); + } + + *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->MyError) { + s->TimeForFetchingMetaPTE = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->DestinationLinesToRequestVMInVBlank = 0; + *p->DestinationLinesToRequestRowInVBlank = 0; + *p->DestinationLinesForPrefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixDataBWLuma = 0; + *p->RequiredPrefetchPixDataBWChroma = 0; + } + + return s->MyError; +} // CalculatePrefetchSchedule + +static void CalculateBytePerPixelAndBlockSizes( + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + + // Output + dml_uint_t *BytePerPixelY, + dml_uint_t *BytePerPixelC, + dml_float_t *BytePerPixelDETY, + dml_float_t *BytePerPixelDETC, + dml_uint_t *BlockHeight256BytesY, + dml_uint_t *BlockHeight256BytesC, + dml_uint_t *BlockWidth256BytesY, + dml_uint_t *BlockWidth256BytesC, + dml_uint_t *MacroTileHeightY, + dml_uint_t *MacroTileHeightC, + dml_uint_t *MacroTileWidthY, + dml_uint_t *MacroTileWidthC) +{ + if (SourcePixelFormat == dml_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + *BytePerPixelDETY = (dml_float_t) (4.0 / 3); + *BytePerPixelDETC = (dml_float_t) (8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); +#endif + if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32 + || SourcePixelFormat == dml_444_16 + || SourcePixelFormat == dml_444_8 + || SourcePixelFormat == dml_mono_16 + || SourcePixelFormat == dml_mono_8 + || SourcePixelFormat == dml_rgbe)) { + if (SurfaceTiling == dml_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { + if (SurfaceTiling == dml_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); +#endif + + if (SurfaceTiling == dml_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +#endif +} // CalculateBytePerPixelAndBlockSizes + +static noinline_for_stack dml_float_t CalculateTWait( + dml_uint_t PrefetchMode, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + dml_bool_t DRRDisplay, + dml_float_t DRAMClockChangeLatency, + dml_float_t FCLKChangeLatency, + dml_float_t UrgentLatency, + dml_float_t SREnterPlusExitTime) +{ + dml_float_t TWait = 0.0; + + if (PrefetchMode == 0 && + !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) && + !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { + TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); + } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) { + TWait = dml_max(SREnterPlusExitTime, UrgentLatency); + } else { + TWait = UrgentLatency; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode); + dml_print("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} // CalculateTWait + + +/// @brief Calculate the "starting point" for prefetch calculation +/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation +/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from +/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving +/// features). +static void CalculatePrefetchMode( + enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank, + dml_uint_t *MinPrefetchMode, + dml_uint_t *MaxPrefetchMode) +{ + if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) { + *MinPrefetchMode = 0; // consider all pwr saving features + *MaxPrefetchMode = 3; // consider just urgent latency + } else { + if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) { + *MinPrefetchMode = 3; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) { + *MinPrefetchMode = 2; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) { + *MinPrefetchMode = 1; + } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) { + *MinPrefetchMode = 0; + } else { + dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank); + ASSERT(0); + } + *MaxPrefetchMode = *MinPrefetchMode; + } +} // CalculatePrefetchMode + +static dml_float_t CalculateWriteBackDISPCLK( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t PixelClock, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackHTaps, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackSourceWidth, + dml_uint_t WritebackDestinationWidth, + dml_uint_t HTotal, + dml_uint_t WritebackLineBufferSize, + dml_float_t DISPCLKDPPCLKVCOSpeed) +{ + dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth; + return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); +} + +static dml_float_t CalculateWriteBackDelay( + enum dml_source_format_class WritebackPixelFormat, + dml_float_t WritebackHRatio, + dml_float_t WritebackVRatio, + dml_uint_t WritebackVTaps, + dml_uint_t WritebackDestinationWidth, + dml_uint_t WritebackDestinationHeight, + dml_uint_t WritebackSourceHeight, + dml_uint_t HTotal) +{ + dml_float_t CalculateWriteBackDelay; + dml_float_t Line_length; + dml_float_t Output_lines_last_notclamped; + dml_float_t WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static void CalculateVUpdateAndDynamicMetadataParameters( + dml_uint_t MaxInterDCNTileRepeaters, + dml_float_t Dppclk, + dml_float_t Dispclk, + dml_float_t DCFClkDeepSleep, + dml_float_t PixelClock, + dml_uint_t HTotal, + dml_uint_t VBlank, + dml_uint_t DynamicMetadataTransmittedBytes, + dml_uint_t DynamicMetadataLinesBeforeActiveRequired, + dml_uint_t InterlaceEnable, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + + // Output + dml_float_t *TSetup, + dml_float_t *Tdmbf, + dml_float_t *Tdmec, + dml_float_t *Tdmsks, + dml_uint_t *VUpdateOffsetPix, + dml_uint_t *VUpdateWidthPix, + dml_uint_t *VReadyOffsetPix) +{ + dml_float_t TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + dml_print("DML::%s: VBlank = %u\n", __func__, VBlank); + dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); + dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk); + dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static void CalculateRowBandwidth( + dml_bool_t GPUVMEnable, + enum dml_source_format_class SourcePixelFormat, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_bool_t DCCEnable, + dml_float_t LineTime, + dml_uint_t MetaRowByteLuma, + dml_uint_t MetaRowByteChroma, + dml_uint_t meta_row_height_luma, + dml_uint_t meta_row_height_chroma, + dml_uint_t PixelPTEBytesPerRowLuma, + dml_uint_t PixelPTEBytesPerRowChroma, + dml_uint_t dpte_row_height_luma, + dml_uint_t dpte_row_height_chroma, + // Output + dml_float_t *meta_row_bw, + dml_float_t *dpte_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatioChroma * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule +/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes +static void CalculateFlipSchedule( + dml_float_t HostVMInefficiencyFactor, + dml_float_t UrgentExtraLatency, + dml_float_t UrgentLatency, + dml_uint_t GPUVMMaxPageTableLevels, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMaxNonCachedPageTableLevels, + dml_bool_t GPUVMEnable, + dml_uint_t HostVMMinPageSize, + dml_float_t PDEAndMetaPTEBytesPerFrame, + dml_float_t MetaRowBytes, + dml_float_t DPTEBytesPerRow, + dml_float_t BandwidthAvailableForImmediateFlip, + dml_uint_t TotImmediateFlipBytes, + enum dml_source_format_class SourcePixelFormat, + dml_float_t LineTime, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t Tno_bw, + dml_bool_t DCCEnable, + dml_uint_t dpte_row_height, + dml_uint_t meta_row_height, + dml_uint_t dpte_row_height_chroma, + dml_uint_t meta_row_height_chroma, + dml_bool_t use_one_row_for_frame_flip, + + // Output + dml_float_t *DestinationLinesToRequestVMInImmediateFlip, + dml_float_t *DestinationLinesToRequestRowInImmediateFlip, + dml_float_t *final_flip_bw, + dml_bool_t *ImmediateFlipSupportedForPipe) +{ + dml_float_t min_row_time = 0.0; + dml_uint_t HostVMDynamicLevelsTrips = 0; + dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0; + dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0; + dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe + + if (GPUVMEnable == true && HostVMEnable == true) { + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + } else { + HostVMDynamicLevelsTrips = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); +#endif + + if (TotImmediateFlipBytes > 0) { + if (use_one_row_for_frame_flip) { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; + } else { + ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes; + } + if (GPUVMEnable == true) { + TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, + UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), + LineTime / 4.0); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; + *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), + (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); + } else if ((GPUVMEnable == true || DCCEnable == true)) { + *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + TimeForFetchingRowInVBlankImmediateFlip = 0; + *DestinationLinesToRequestVMInImmediateFlip = 0; + *DestinationLinesToRequestRowInImmediateFlip = 0; + *final_flip_bw = 0; + } + + if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); + } else { + min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); + } + } else { + if (GPUVMEnable == true && DCCEnable != true) { + min_row_time = dpte_row_height * LineTime / VRatio; + } else if (GPUVMEnable != true && DCCEnable == true) { + min_row_time = meta_row_height * LineTime / VRatio; + } else { + min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); + } + } + + if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); + + dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes); + dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW); + dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame); + dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + + dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); + dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); + dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); + dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); + dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); + dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} // CalculateFlipSchedule + +static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed) +{ + if (Clock <= 0.0) + return 0.0; + else { + if (round_up) + return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); + else + return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); + } +} + +static void CalculateDCCConfiguration( + dml_bool_t DCCEnabled, + dml_bool_t DCCProgrammingAssumesScanDirectionUnknown, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceWidthLuma, + dml_uint_t SurfaceWidthChroma, + dml_uint_t SurfaceHeightLuma, + dml_uint_t SurfaceHeightChroma, + dml_uint_t nomDETInKByte, + dml_uint_t RequestHeight256ByteLuma, + dml_uint_t RequestHeight256ByteChroma, + enum dml_swizzle_mode TilingFormat, + dml_uint_t BytePerPixelY, + dml_uint_t BytePerPixelC, + dml_float_t BytePerPixelDETY, + dml_float_t BytePerPixelDETC, + enum dml_rotation_angle SourceScan, + // Output + dml_uint_t *MaxUncompressedBlockLuma, + dml_uint_t *MaxUncompressedBlockChroma, + dml_uint_t *MaxCompressedBlockLuma, + dml_uint_t *MaxCompressedBlockChroma, + dml_uint_t *IndependentBlockLuma, + dml_uint_t *IndependentBlockChroma) +{ + dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024; + + dml_uint_t yuv420; + dml_uint_t horz_div_l; + dml_uint_t horz_div_c; + dml_uint_t vert_div_l; + dml_uint_t vert_div_c; + + dml_uint_t swath_buf_size; + dml_float_t detile_buf_vp_horz_limit; + dml_float_t detile_buf_vp_vert_limit; + + dml_uint_t MAS_vp_horz_limit; + dml_uint_t MAS_vp_vert_limit; + dml_uint_t max_vp_horz_width; + dml_uint_t max_vp_vert_height; + dml_uint_t eff_surf_width_l; + dml_uint_t eff_surf_width_c; + dml_uint_t eff_surf_height_l; + dml_uint_t eff_surf_height_c; + + dml_uint_t full_swath_bytes_horz_wc_l; + dml_uint_t full_swath_bytes_horz_wc_c; + dml_uint_t full_swath_bytes_vert_wc_l; + dml_uint_t full_swath_bytes_vert_wc_c; + + dml_uint_t req128_horz_wc_l; + dml_uint_t req128_horz_wc_c; + dml_uint_t req128_vert_wc_l; + dml_uint_t req128_vert_wc_c; + + dml_uint_t segment_order_horz_contiguous_luma; + dml_uint_t segment_order_horz_contiguous_chroma; + dml_uint_t segment_order_vert_contiguous_luma; + dml_uint_t segment_order_vert_contiguous_chroma; + + typedef enum{ + REQ_256Bytes, + REQ_128BytesNonContiguous, + REQ_128BytesContiguous, + REQ_NA + } RequestType; + + RequestType RequestLuma; + RequestType RequestChroma; + + yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml_420_10) { + full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else if (!dml_is_vertical_rotation(SourceScan)) { + if (req128_horz_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_horz_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } else { + if (req128_vert_wc_l == 0) { + RequestLuma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + RequestLuma = REQ_128BytesNonContiguous; + } else { + RequestLuma = REQ_128BytesContiguous; + } + if (req128_vert_wc_c == 0) { + RequestChroma = REQ_256Bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + RequestChroma = REQ_128BytesNonContiguous; + } else { + RequestChroma = REQ_128BytesContiguous; + } + } + + if (RequestLuma == REQ_256Bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (RequestLuma == REQ_128BytesContiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (RequestChroma == REQ_256Bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (RequestChroma == REQ_128BytesContiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} // CalculateDCCConfiguration + +static dml_uint_t CalculatePrefetchSourceLines( + dml_float_t VRatio, + dml_uint_t VTaps, + dml_bool_t Interlace, + dml_bool_t ProgressiveToInterlaceUnitInOPP, + dml_uint_t SwathHeight, + enum dml_rotation_angle SourceScan, + dml_bool_t ViewportStationary, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + + // Output + dml_uint_t *VInitPreFill, + dml_uint_t *MaxNumSwath) +{ + + dml_uint_t vp_start_rot = 0; + dml_uint_t sw0_tmp = 0; + dml_uint_t MaxPartialSwath = 0; + dml_float_t numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); + dml_print("DML::%s: VTaps = %u\n", __func__, VTaps); + dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) { + vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) { + vp_start_rot = ViewportXStart; + } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) { + vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (dml_uint_t)(numLines); + +} // CalculatePrefetchSourceLines + +static dml_uint_t CalculateVMAndRowBytes( + dml_bool_t ViewportStationary, + dml_bool_t DCCEnable, + dml_uint_t NumberOfDPPs, + dml_uint_t BlockHeight256Bytes, + dml_uint_t BlockWidth256Bytes, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t SurfaceTiling, + dml_uint_t BytePerPixel, + enum dml_rotation_angle SourceScan, + dml_uint_t SwathWidth, + dml_uint_t ViewportHeight, + dml_uint_t ViewportXStart, + dml_uint_t ViewportYStart, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t GPUVMMinPageSizeKBytes, + dml_uint_t PTEBufferSizeInRequests, + dml_uint_t Pitch, + dml_uint_t DCCMetaPitch, + dml_uint_t MacroTileWidth, + dml_uint_t MacroTileHeight, + + // Output + dml_uint_t *MetaRowByte, + dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation + dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check + dml_uint_t *dpte_row_width_ub, + dml_uint_t *dpte_row_height, + dml_uint_t *dpte_row_height_linear, + dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + dml_uint_t *dpte_row_width_ub_one_row_per_frame, + dml_uint_t *dpte_row_height_one_row_per_frame, + dml_uint_t *MetaRequestWidth, + dml_uint_t *MetaRequestHeight, + dml_uint_t *meta_row_width, + dml_uint_t *meta_row_height, + dml_uint_t *PixelPTEReqWidth, + dml_uint_t *PixelPTEReqHeight, + dml_uint_t *PTERequestSize, + dml_uint_t *DPDE0BytesFrame, + dml_uint_t *MetaPTEBytesFrame) +{ + dml_uint_t MPDEBytesFrame; + dml_uint_t DCCMetaSurfaceBytes; + dml_uint_t ExtraDPDEBytesFrame; + dml_uint_t PDEAndMetaPTEBytesFrame; + dml_uint_t MacroTileSizeBytes; + dml_uint_t vp_height_meta_ub; + dml_uint_t vp_height_dpte_ub; + + dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + *MetaRequestHeight = 8 * BlockHeight256Bytes; + *MetaRequestWidth = 8 * BlockWidth256Bytes; + if (SurfaceTiling == dml_sw_linear) { + *meta_row_height = 32; + *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + *meta_row_height = *MetaRequestHeight; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth)); + } else { + *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth); + } + *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0); + } else { + *meta_row_height = *MetaRequestWidth; + if (ViewportStationary && NumberOfDPPs == 1) { + *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight)); + } else { + *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight); + } + *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0); + } + + if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { + vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); + } else { + vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes); + } + + DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0); + + if (GPUVMEnable == true) { + *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64); + MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); + } else { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + + if (DCCEnable != true) { + *MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; + + if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) { + vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight)); + } else if (!dml_is_vertical_rotation(SourceScan)) { + vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight); + } else { + vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight); + } + + if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { + *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1)); + ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); + } else { + *DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear); + dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel); + dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels); + dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame); + dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame); + dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame); + dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight); + dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth); + dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); +#endif + + if (SurfaceTiling == dml_sw_linear) { + *PixelPTEReqHeight = 1; + *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; + *PTERequestSize = 64; + } else if (GPUVMMinPageSizeKBytes == 4) { + *PixelPTEReqHeight = 16 * BlockHeight256Bytes; + *PixelPTEReqWidth = 16 * BlockWidth256Bytes; + *PTERequestSize = 128; + } else { + *PixelPTEReqHeight = MacroTileHeight; + *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); + *PTERequestSize = 64; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); + dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight); + dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth); + dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize); + dml_print("DML::%s: Pitch = %u\n", __func__, Pitch); +#endif + + *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth); + *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); + + if (SurfaceTiling == dml_sw_linear) { + *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1))); + dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); + dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); + dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); + dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); + + *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth); + *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize); + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1); + if (*dpte_row_height_linear > 128) + *dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(SourceScan)) { + *dpte_row_height = *PixelPTEReqHeight; + + if (GPUVMMinPageSizeKBytes > 64) { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth); + } else if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth)); + } else { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub); +#endif + + ASSERT(*PixelPTEReqWidth); + if (*PixelPTEReqWidth != 0) + *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; + } else { + *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth)); + + if (ViewportStationary && (NumberOfDPPs == 1)) { + *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight)); + } else { + *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight); + } + + *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub); +#endif + } + + if (GPUVMEnable != true) + *PixelPTEBytesPerRow = 0; + + *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height); + dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear); + dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow); + dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage); + dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame); + dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame); + dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame); +#endif + + dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); + + return PDEAndMetaPTEBytesFrame; +} // CalculateVMAndRowBytes + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported) +{ + dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg); + + //Progressive To Interlace Unit Effect + for (dml_uint_t k = 0; k < num_active_planes; ++k) { + display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k]; + if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) { + display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k]; + } + } +} + +static dml_float_t TruncToValidBPP( + dml_float_t LinkBitRate, + dml_uint_t Lanes, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClock, + dml_float_t DesiredBPP, + dml_bool_t DSCEnable, + enum dml_output_encoder_class Output, + enum dml_output_format_class Format, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t DSCSlices, + dml_uint_t AudioRate, + dml_uint_t AudioLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + + // Output + dml_uint_t *RequiredSlots) +{ + dml_float_t MaxLinkBPP; + dml_uint_t MinDSCBPP; + dml_float_t MaxDSCBPP; + dml_uint_t NonDSCBPP0; + dml_uint_t NonDSCBPP1; + dml_uint_t NonDSCBPP2; + + if (Format == dml_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; + } else if (Format == dml_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; + } else { + if (Output == dml_hdmi) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml_n422) { + MinDSCBPP = 7; + MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; + } + } + + if (Output == dml_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + if (DSCEnable) { + if (ODMModeDSC == dml_odm_mode_combine_4to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + } else if (ODMModeDSC == dml_odm_mode_combine_2to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + } else if (ODMModeDSC == dml_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + } else { + if (ODMModeNoDSC == dml_odm_mode_combine_4to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 16); + } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) { + MaxLinkBPP = dml_min(MaxLinkBPP, 32); + } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + } + + *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1)); + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} // TruncToValidBPP + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct display_mode_lib_scratch_st *scratch, + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p) +{ + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); +#endif + + s->TotalActiveWriteback = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->WritebackEnable[k] == true) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequiredFinal) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal); + dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]); + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1)); + s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1)); + + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]); + dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]); + dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k]; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k]; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k]; + } + s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + + if (p->WritebackEnable[k]) { + s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0); + if (p->WritebackPixelFormat[k] == dml_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + } + + *p->USRRetrainingSupport = true; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) { + *p->USRRetrainingSupport = false; + } + } + + s->FoundCriticalSurface = false; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + s->FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + + for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) { + for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) { + if (i == j || + (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) || + (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) || + (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) || + (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) || + (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) { + s->SynchronizedSurfaces[i][j] = true; + } else { + s->SynchronizedSurfaces[i][j] = false; + } + } + } + + s->FCLKChangeSupportNumber = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) { + if (!(p->PrefetchMode[k] <= 1)) { + s->FCLKChangeSupportNumber = 3; + } else if (s->FCLKChangeSupportNumber == 0) { + s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1); + s->LastSurfaceWithoutMargin = k; + } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2)) + s->FCLKChangeSupportNumber = 3; + } + } + + if (s->FCLKChangeSupportNumber == 0) { + *p->FCLKChangeSupport = dml_fclock_change_vactive; + } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) { + *p->FCLKChangeSupport = dml_fclock_change_vblank; + } else { + *p->FCLKChangeSupport = dml_fclock_change_unsupported; + } + + s->DRAMClockChangeMethod = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) + s->DRAMClockChangeMethod = 1; + else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) + s->DRAMClockChangeMethod = 2; + } + + s->DRAMClockChangeSupportNumber = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) || + ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) || + ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) { + if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support + s->DRAMClockChangeSupportNumber = 3; + } else if (s->DRAMClockChangeSupportNumber == 0) { + s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1; + s->LastSurfaceWithoutMargin = k; + } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) { + s->DRAMClockChangeSupportNumber = 3; + } + } + } + + if (s->DRAMClockChangeMethod == 0) { // No MALL usage + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } else { // Any pipe using MALL subviewport + if (s->DRAMClockChangeSupportNumber == 0) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp; + } else if (s->DRAMClockChangeSupportNumber == 1) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp; + } else if (s->DRAMClockChangeSupportNumber == 2) { + *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp; + } else { + *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported; + } + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1)); + s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k])); + s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]); + dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k])); + s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k]; + p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]); + dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport); + dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport); + dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport + +static void CalculateDCFCLKDeepSleep( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_uint_t SwathWidthY[], + dml_uint_t SwathWidthC[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t ReturnBusWidth, + + // Output + dml_float_t *DCFClkDeepSleep) +{ + dml_float_t DisplayPipeLineDeliveryTimeLuma; + dml_float_t DisplayPipeLineDeliveryTimeChroma; + dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__]; + dml_float_t ReadBandwidth = 0.0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); + dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} // CalculateDCFCLKDeepSleep + +static void CalculateUrgentBurstFactor( + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange, + dml_uint_t swath_width_luma_ub, + dml_uint_t swath_width_chroma_ub, + dml_uint_t SwathHeightY, + dml_uint_t SwathHeightC, + dml_float_t LineTime, + dml_float_t UrgentLatency, + dml_float_t CursorBufferSize, + dml_uint_t CursorWidth, + dml_uint_t CursorBPP, + dml_float_t VRatio, + dml_float_t VRatioC, + dml_float_t BytePerPixelInDETY, + dml_float_t BytePerPixelInDETC, + dml_uint_t DETBufferSizeY, + dml_uint_t DETBufferSizeC, + // Output + dml_float_t *UrgentBurstFactorCursor, + dml_float_t *UrgentBurstFactorLuma, + dml_float_t *UrgentBurstFactorChroma, + dml_bool_t *NotEnoughUrgentLatencyHiding) +{ + dml_float_t LinesInDETLuma; + dml_float_t LinesInDETChroma; + dml_uint_t LinesInCursorBuffer; + dml_float_t CursorBufferSizeInTime; + dml_float_t DETBufferSizeInTimeLuma; + dml_float_t DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); + if (VRatio > 0) { + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + } else { + *UrgentBurstFactorCursor = 1; + } + } + + LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + if (VRatio > 0) { + DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + } else { + *UrgentBurstFactorLuma = 1; + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + if (VRatioC > 0) { + DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } else { + *UrgentBurstFactorChroma = 1; + } + } +} // CalculateUrgentBurstFactor + +static void CalculatePixelDeliveryTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t VRatioPrefetchY[], + dml_float_t VRatioPrefetchC[], + dml_uint_t swath_width_luma_ub[], + dml_uint_t swath_width_chroma_ub[], + dml_uint_t DPPPerSurface[], + dml_float_t HRatio[], + dml_float_t HRatioChroma[], + dml_float_t PixelClock[], + dml_float_t PSCL_THROUGHPUT[], + dml_float_t PSCL_THROUGHPUT_CHROMA[], + dml_float_t Dppclk[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t NumberOfCursors[], + dml_uint_t CursorWidth[], + dml_uint_t CursorBPP[], + dml_uint_t BlockWidth256BytesY[], + dml_uint_t BlockHeight256BytesY[], + dml_uint_t BlockWidth256BytesC[], + dml_uint_t BlockHeight256BytesC[], + + // Output + dml_float_t DisplayPipeLineDeliveryTimeLuma[], + dml_float_t DisplayPipeLineDeliveryTimeChroma[], + dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeLuma[], + dml_float_t DisplayPipeRequestDeliveryTimeChroma[], + dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[], + dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[], + dml_float_t CursorRequestDeliveryTime[], + dml_float_t CursorRequestDeliveryTimePrefetch[]) +{ + dml_float_t req_per_swath_ub; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]); + dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); + dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); + dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]); + dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + + if (VRatio[k] <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (VRatioChroma[k] <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; + } else { + req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); +#endif + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (!dml_is_vertical_rotation(SourceScan[k])) { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; + } else { + req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); +#endif + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_uint_t cursor_req_per_width; + cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0)); + if (NumberOfCursors[k] > 0) { + if (VRatio[k] <= 1) { + CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + if (VRatioPrefetchY[k] <= 1) { + CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width; + } else { + CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; + } + } else { + CursorRequestDeliveryTime[k] = 0; + CursorRequestDeliveryTimePrefetch[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]); + dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); + dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); +#endif + } +} // CalculatePixelDeliveryTimes + +static void CalculateMetaAndPTETimes( + dml_bool_t use_one_row_for_frame[], + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t MetaChunkSize, + dml_uint_t MinMetaChunkSizeBytes, + dml_uint_t HTotal[], + dml_float_t VRatio[], + dml_float_t VRatioChroma[], + dml_float_t DestinationLinesToRequestRowInVBlank[], + dml_float_t DestinationLinesToRequestRowInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t BytePerPixelY[], + dml_uint_t BytePerPixelC[], + enum dml_rotation_angle SourceScan[], + dml_uint_t dpte_row_height[], + dml_uint_t dpte_row_height_chroma[], + dml_uint_t meta_row_width[], + dml_uint_t meta_row_width_chroma[], + dml_uint_t meta_row_height[], + dml_uint_t meta_row_height_chroma[], + dml_uint_t meta_req_width[], + dml_uint_t meta_req_width_chroma[], + dml_uint_t meta_req_height[], + dml_uint_t meta_req_height_chroma[], + dml_uint_t dpte_group_bytes[], + dml_uint_t PTERequestSizeY[], + dml_uint_t PTERequestSizeC[], + dml_uint_t PixelPTEReqWidthY[], + dml_uint_t PixelPTEReqHeightY[], + dml_uint_t PixelPTEReqWidthC[], + dml_uint_t PixelPTEReqHeightC[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + + // Output + dml_float_t DST_Y_PER_PTE_ROW_NOM_L[], + dml_float_t DST_Y_PER_PTE_ROW_NOM_C[], + dml_float_t DST_Y_PER_META_ROW_NOM_L[], + dml_float_t DST_Y_PER_META_ROW_NOM_C[], + dml_float_t TimePerMetaChunkNominal[], + dml_float_t TimePerChromaMetaChunkNominal[], + dml_float_t TimePerMetaChunkVBlank[], + dml_float_t TimePerChromaMetaChunkVBlank[], + dml_float_t TimePerMetaChunkFlip[], + dml_float_t TimePerChromaMetaChunkFlip[], + dml_float_t time_per_pte_group_nom_luma[], + dml_float_t time_per_pte_group_vblank_luma[], + dml_float_t time_per_pte_group_flip_luma[], + dml_float_t time_per_pte_group_nom_chroma[], + dml_float_t time_per_pte_group_vblank_chroma[], + dml_float_t time_per_pte_group_flip_chroma[]) +{ + dml_uint_t meta_chunk_width; + dml_uint_t min_meta_chunk_width; + dml_uint_t meta_chunk_per_row_int; + dml_uint_t meta_row_remainder; + dml_uint_t meta_chunk_threshold; + dml_uint_t meta_chunks_per_row_ub; + dml_uint_t meta_chunk_width_chroma; + dml_uint_t min_meta_chunk_width_chroma; + dml_uint_t meta_chunk_per_row_int_chroma; + dml_uint_t meta_row_remainder_chroma; + dml_uint_t meta_chunk_threshold_chroma; + dml_uint_t meta_chunks_per_row_ub_chroma; + dml_uint_t dpte_group_width_luma; + dml_uint_t dpte_groups_per_row_luma_ub; + dml_uint_t dpte_group_width_chroma; + dml_uint_t dpte_groups_per_row_chroma_ub; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; + } + DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; + if (BytePerPixelC[k] == 0) { + DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (DCCEnable[k] == true) { + meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; + min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; + meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; + meta_row_remainder = meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(SourceScan[k])) { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; + if (BytePerPixelC[k] == 0) { + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(SourceScan[k])) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; + } + } else { + TimePerMetaChunkNominal[k] = 0; + TimePerMetaChunkVBlank[k] = 0; + TimePerMetaChunkFlip[k] = 0; + TimePerChromaMetaChunkNominal[k] = 0; + TimePerChromaMetaChunkVBlank[k] = 0; + TimePerChromaMetaChunkFlip[k] = 0; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (GPUVMEnable == true) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]); + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0)); + } + + dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]); + dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]); + dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]); + dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]); + dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]); + dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; + if (BytePerPixelC[k] == 0) { + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(SourceScan[k])) { + dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]); + } + + if (use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0)); + } + dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; + } + } else { + time_per_pte_group_nom_luma[k] = 0; + time_per_pte_group_vblank_luma[k] = 0; + time_per_pte_group_flip_luma[k] = 0; + time_per_pte_group_nom_chroma[k] = 0; + time_per_pte_group_vblank_chroma[k] = 0; + time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]); + dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); + + dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); + dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]); + dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]); + dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]); + dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t GPUVMEnable, + dml_uint_t GPUVMMaxPageTableLevels, + dml_uint_t HTotal[], + dml_uint_t BytePerPixelC[], + dml_float_t DestinationLinesToRequestVMInVBlank[], + dml_float_t DestinationLinesToRequestVMInImmediateFlip[], + dml_bool_t DCCEnable[], + dml_float_t PixelClock[], + dml_uint_t dpte_row_width_luma_ub[], + dml_uint_t dpte_row_width_chroma_ub[], + dml_uint_t vm_group_bytes[], + dml_uint_t dpde0_bytes_per_frame_ub_l[], + dml_uint_t dpde0_bytes_per_frame_ub_c[], + dml_uint_t meta_pte_bytes_per_frame_ub_l[], + dml_uint_t meta_pte_bytes_per_frame_ub_c[], + + // Output + dml_float_t TimePerVMGroupVBlank[], + dml_float_t TimePerVMGroupFlip[], + dml_float_t TimePerVMRequestVBlank[], + dml_float_t TimePerVMRequestFlip[]) +{ + dml_uint_t num_group_per_lower_vm_stage; + dml_uint_t num_req_per_lower_vm_stage; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); +#endif + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]); + dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) + + dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0)); + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0)); + } + } else { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) + + dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1)); + } + } + } + + if (DCCEnable[k] == false) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (GPUVMMaxPageTableLevels == 1) { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } else { + if (BytePerPixelC[k] > 0) { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; + } else { + num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; + } + } + } + + TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; + TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; + + if (GPUVMMaxPageTableLevels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} // CalculateVMGroupAndRequestTimes + +static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch, + struct CalculateStutterEfficiency_params_st *p) +{ + dml_float_t DETBufferingTimeY = 0; + dml_float_t SwathWidthYCriticalSurface = 0; + dml_float_t SwathHeightYCriticalSurface = 0; + dml_float_t VActiveTimeCriticalSurface = 0; + dml_float_t FrameTimeCriticalSurface = 0; + dml_uint_t BytePerPixelYCriticalSurface = 0; + dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0; + dml_uint_t DETBufferSizeYCriticalSurface = 0; + dml_float_t MinTTUVBlankCriticalSurface = 0; + dml_uint_t BlockWidth256BytesYCriticalSurface = 0; + dml_bool_t SinglePlaneCriticalSurface = 0; + dml_bool_t SinglePipeCriticalSurface = 0; + dml_float_t TotalCompressedReadBandwidth = 0; + dml_float_t TotalRowReadBandwidth = 0; + dml_float_t AverageDCCCompressionRate = 0; + dml_float_t EffectiveCompressedBufferSize = 0; + dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0; + dml_float_t StutterBurstTime = 0; + dml_uint_t TotalActiveWriteback = 0; + dml_float_t LinesInDETY = 0; + dml_float_t LinesInDETYRoundedDownToSwath = 0; + dml_float_t MaximumEffectiveCompressionLuma = 0; + dml_float_t MaximumEffectiveCompressionChroma = 0; + dml_float_t TotalZeroSizeRequestReadBandwidth = 0; + dml_float_t TotalZeroSizeCompressedReadBandwidth = 0; + dml_float_t AverageDCCZeroSizeFraction = 0; + dml_float_t AverageZeroSizeCompressionRate = 0; + + dml_bool_t FoundCriticalSurface = false; + + dml_uint_t TotalNumberOfActiveOTG = 0; + dml_float_t SinglePixelClock = 0; + dml_uint_t SingleHTotal = 0; + dml_uint_t SingleVTotal = 0; + dml_bool_t SameTiming = true; + + dml_float_t LastStutterPeriod = 0.0; + dml_float_t LastZ8StutterPeriod = 0.0; + + dml_uint_t SwathSizeCriticalSurface; + dml_uint_t LastChunkOfSwathSize; + dml_uint_t MissingPartOfLastSwathOfDETSize; + + TotalZeroSizeRequestReadBandwidth = 0; + TotalZeroSizeCompressedReadBandwidth = 0; + TotalRowReadBandwidth = 0; + TotalCompressedReadBandwidth = 0; + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (p->DCCEnable[k] == true) { + if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionLuma = 2; + } else { + MaximumEffectiveCompressionLuma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]); + dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + MaximumEffectiveCompressionChroma = 2; + } else { + MaximumEffectiveCompressionChroma = 4; + } + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]); + dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma); +#endif + TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k]; + TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; + } + } else { + TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth; + AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); + dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); + dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); + dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); + dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); + dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B); + dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte); +#endif + if (AverageDCCZeroSizeFraction == 1) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate; + } else if (AverageDCCZeroSizeFraction > 0) { + AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; + EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) + + dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate, + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); + dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); + dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); +#endif + } else { + EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, + (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + + ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); + dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); +#endif + + *p->StutterPeriod = 0; + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]); + DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY); + dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); + dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]); + dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]); + dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]); + dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); + dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]); +#endif + + if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) { + dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = DETBufferingTimeY; + FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k]; + BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + SwathWidthYCriticalSurface = p->SwathWidthY[k]; + SwathHeightYCriticalSurface = p->SwathHeightY[k]; + BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); + DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface); + dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface); + dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface); + dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface); + dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface); + dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface); + dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface); + dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface); + dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface); + dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); +#endif + } + } + } + + PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte); + dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth); + dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); + dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); + dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); + dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); + dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW); + dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth)); + dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); +#endif + StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); + + dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW); + + TotalActiveWriteback = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->WritebackEnable[k]) { + TotalActiveWriteback = TotalActiveWriteback + 1; + } + } + + if (TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (p->BlendingAndTiming[k] == k) { + if (TotalNumberOfActiveOTG == 0) { + SinglePixelClock = p->PixelClock[k]; + SingleHTotal = p->HTotal[k]; + SingleVTotal = p->VTotal[k]; + } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + } + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + + if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && + LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } + } else { + *p->StutterEfficiency = 0; + } + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) { + *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100; + } else { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } + } else { + *p->Z8StutterEfficiency = 0.; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); + dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface)); + LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); + MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface); + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && + (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); + dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface); + dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); + dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); + dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} // CalculateStutterEfficiency + +/// \CalculateSwathAndDETConfiguration +/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.) +static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch, + struct CalculateSwathAndDETConfiguration_params_st *p) +{ + dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__]; + dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__]; + dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__]; + dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__]; + + dml_uint_t TotalActiveDPP = 0; + dml_bool_t NoChromaOrLinearSurfaces = true; + dml_uint_t SurfaceDoingUnboundedRequest = 0; + + dml_uint_t DETBufferSizeInKByteForSwathCalculation; + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth(p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->SourcePixelFormat, + p->SourceScan, + p->ViewportStationary, + p->ViewportWidth, + p->ViewportHeight, + p->ViewportXStart, + p->ViewportYStart, + p->ViewportXStartC, + p->ViewportYStartC, + p->SurfaceWidthY, + p->SurfaceWidthC, + p->SurfaceHeightY, + p->SurfaceHeightC, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->BlendingAndTiming, + p->HActive, + p->HRatio, + p->DPPPerSurface, + + // Output + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); + RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); +#endif + if (p->SourcePixelFormat[k] == dml_420_10) { + RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256)); + RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256)); + } + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 || + p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha + || p->SurfaceTiling[k] == dml_sw_linear) { + NoChromaOrLinearSurfaces = false; + } + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP, + NoChromaOrLinearSurfaces, p->Output[0]); + + CalculateDETBufferSize(p->DETSizeOverride, + p->UseMALLForPStateChange, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + *p->UnboundedRequestEnabled, + p->nomDETInKByte, + p->MaxTotalDETInKByte, + p->ConfigReturnBufferSizeInKByte, + p->MinCompressedBufferSizeInKByte, + p->ConfigReturnBufferSegmentSizeInkByte, + p->CompressedBufferSegmentSizeInkByteFinal, + p->SourcePixelFormat, + p->ReadBandwidthLuma, + p->ReadBandwidthChroma, + RoundedUpMaxSwathSizeBytesY, + RoundedUpMaxSwathSizeBytesC, + p->DPPPerSurface, + + // Output + p->DETBufferSizeInKByte, // per hubp pipe + p->CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + *p->ViewportSizeSupport = true; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + + if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; + } else { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2; + } + + if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b, + (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64) + - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64)); + } + *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256; +} // CalculateSwathAndDETConfiguration + +static void CalculateSwathWidth( + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + enum dml_source_format_class SourcePixelFormat[], + enum dml_rotation_angle SourceScan[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportWidth[], + dml_uint_t ViewportHeight[], + dml_uint_t ViewportXStart[], + dml_uint_t ViewportYStart[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + enum dml_odm_mode ODMMode[], + dml_uint_t BytePerPixY[], + dml_uint_t BytePerPixC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t BlendingAndTiming[], + dml_uint_t HActive[], + dml_float_t HRatio[], + dml_uint_t DPPPerSurface[], + + // Output + dml_uint_t SwathWidthSingleDPPY[], + dml_uint_t SwathWidthSingleDPPC[], + dml_uint_t SwathWidthY[], // per-pipe + dml_uint_t SwathWidthC[], // per-pipe + dml_uint_t MaximumSwathHeightY[], + dml_uint_t MaximumSwathHeightC[], + dml_uint_t swath_width_luma_ub[], // per-pipe + dml_uint_t swath_width_chroma_ub[]) // per-pipe +{ + enum dml_odm_mode MainSurfaceODMMode; + dml_uint_t surface_width_ub_l; + dml_uint_t surface_height_ub_l; + dml_uint_t surface_width_ub_c = 0; + dml_uint_t surface_height_ub_c = 0; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(SourceScan[k])) { + SwathWidthSingleDPPY[k] = ViewportWidth[k]; + } else { + SwathWidthSingleDPPY[k] = ViewportHeight[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]); + dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]); + dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + MainSurfaceODMMode = ODMMode[k]; + for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { + if (BlendingAndTiming[k] == j) { + MainSurfaceODMMode = ODMMode[j]; + } + } + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) { + SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true))); + } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) { + SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]); + dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]); + dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); +#endif + + if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); + surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); + + if (!dml_is_vertical_rotation(SourceScan[k])) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k]))); + } else { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k])); + } + if (BytePerPixC[k] > 0) { + surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k]))); + } else { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k])); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + if (BytePerPixC[k] > 0) { + surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); + if (ViewportStationary[k] && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + } else { + swath_width_chroma_ub[k] = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]); + dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); +#endif + + } +} // CalculateSwathWidth + +static noinline_for_stack dml_float_t CalculateExtraLatency( + dml_uint_t RoundTripPingLatencyCycles, + dml_uint_t ReorderingBytes, + dml_float_t DCFCLK, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_float_t ReturnBW, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_float_t ExtraLatencyBytes; + dml_float_t ExtraLatency; + + ExtraLatencyBytes = CalculateExtraLatencyBytes( + ReorderingBytes, + TotalNumberOfActiveDPP, + PixelChunkSizeInKByte, + TotalNumberOfDCCActiveDPP, + MetaChunkSize, + GPUVMEnable, + HostVMEnable, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + HostVMInefficiencyFactor, + HostVMMinPageSize, + HostVMMaxNonCachedPageTableLevels); + + ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); + dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); +#endif + + return ExtraLatency; +} // CalculateExtraLatency + +static dml_uint_t CalculateHostVMDynamicLevels( + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_uint_t HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t PixelChunkSizeInKByte, + dml_uint_t TotalNumberOfDCCActiveDPP, + dml_uint_t MetaChunkSize, + dml_bool_t GPUVMEnable, + dml_bool_t HostVMEnable, + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t NumberOfDPP[], + dml_uint_t dpte_group_bytes[], + dml_float_t HostVMInefficiencyFactor, + dml_uint_t HostVMMinPageSize, + dml_uint_t HostVMMaxNonCachedPageTableLevels) +{ + dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels); + dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; + + if (GPUVMEnable == true) { + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + } + } + return (dml_uint_t)(ret); +} + +static dml_float_t CalculateUrgentLatency( + dml_float_t UrgentLatencyPixelDataOnly, + dml_float_t UrgentLatencyPixelMixedWithVMData, + dml_float_t UrgentLatencyVMDataOnly, + dml_bool_t DoUrgentLatencyAdjustment, + dml_float_t UrgentLatencyAdjustmentFabricClockComponent, + dml_float_t UrgentLatencyAdjustmentFabricClockReference, + dml_float_t FabricClock) +{ + dml_float_t ret; + + ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + return ret; +} + +static dml_float_t RequiredDTBCLK( + dml_bool_t DSCEnable, + dml_float_t PixelClock, + enum dml_output_format_class OutputFormat, + dml_float_t OutputBpp, + dml_uint_t DSCSlices, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_uint_t AudioRate, + dml_uint_t AudioLayout) +{ + if (DSCEnable != true) { + return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2); + dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p) +{ + struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals; + + s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; + for (dml_uint_t j = 0; j < 2; ++j) { + + + s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]); + } + + for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { + s->NoOfDPPState[k] = p->NoOfDPP[j][k]; + } + + s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j]; + + s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth); + + s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j], + p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes, + 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); + s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth; + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_float_t DCFCLKCyclesRequiredInPrefetch; + dml_float_t PrefetchTime; + + s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth; + DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k]; + s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k]; + s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; + + s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode, + p->UseMALLForPStateChange[k], + p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + p->DRRDisplay[k], + p->DRAMClockChangeLatencyFinal, + p->FCLKChangeLatency, + p->UrgLatency, + p->SREnterPlusExitTime); + + PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k]; + + if (PrefetchTime > 0) { + dml_float_t ExpectedVRatioPrefetch; + ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4); + if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth; + } + } else { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; + } + if (p->DynamicMetadataEnable[k] == true) { + dml_float_t TSetupPipe; + dml_float_t TdmbfPipe; + dml_float_t TdmsksPipe; + dml_float_t TdmecPipe; + dml_float_t AllowedTimeForUrgentExtraLatency; + + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->RequiredDPPCLKPerSurface[j][k], + p->RequiredDISPCLK[j], + p->ProjectedDCFCLKDeepSleep[j], + p->PixelClock[k], + p->HTotal[k], + p->VTotal[k] - p->VActive[k], + p->DynamicMetadataTransmittedBytes[k], + p->DynamicMetadataLinesBeforeActiveRequired[k], + p->Interlace[k], + p->ProgressiveToInterlaceUnitInOPP, + + // Output + &TSetupPipe, + &TdmbfPipe, + &TdmecPipe, + &TdmsksPipe, + &s->dummy1, + &s->dummy2, + &s->dummy3); + + AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k]; + if (AllowedTimeForUrgentExtraLatency > 0) { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); + } else { + s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState; + } + } + } + s->DCFCLKRequiredForPeakBandwidth = 0; + for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) { + s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k]; + } + s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0); + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml_float_t MaximumTvmPlus2Tr0PlusTsw; + MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k]; + if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) { + s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState; + } else { + s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth, + 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4), + (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0)); + } + } + p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth)); + } +} + + +static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal, + dml_uint_t TotalNumberOfActiveDPP, + dml_bool_t NoChromaOrLinear, + enum dml_output_encoder_class Output) +{ + dml_bool_t ret_val = false; + + ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable + && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) { + ret_val = false; + } + return (ret_val); +} + +static void CalculateSurfaceSizeInMall( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCN, + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[], + dml_bool_t DCCEnable[], + dml_bool_t ViewportStationary[], + dml_uint_t ViewportXStartY[], + dml_uint_t ViewportYStartY[], + dml_uint_t ViewportXStartC[], + dml_uint_t ViewportYStartC[], + dml_uint_t ViewportWidthY[], + dml_uint_t ViewportHeightY[], + dml_uint_t BytesPerPixelY[], + dml_uint_t ViewportWidthC[], + dml_uint_t ViewportHeightC[], + dml_uint_t BytesPerPixelC[], + dml_uint_t SurfaceWidthY[], + dml_uint_t SurfaceWidthC[], + dml_uint_t SurfaceHeightY[], + dml_uint_t SurfaceHeightC[], + dml_uint_t Read256BytesBlockWidthY[], + dml_uint_t Read256BytesBlockWidthC[], + dml_uint_t Read256BytesBlockHeightY[], + dml_uint_t Read256BytesBlockHeightC[], + dml_uint_t ReadBlockWidthY[], + dml_uint_t ReadBlockWidthC[], + dml_uint_t ReadBlockHeightY[], + dml_uint_t ReadBlockHeightC[], + + // Output + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t *ExceededMALLSize) +{ + dml_uint_t TotalSurfaceSizeInMALL = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ViewportStationary[k]) { + SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) * + dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * + BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) * + dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256); + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) * + dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256); + } + } + } else { + SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + if (DCCEnable[k] == true) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) * + dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256); + + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] + + dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) * + dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256); + } + } + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; + } + *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); +} // CalculateSurfaceSizeInMall + +static void CalculateDETBufferSize( + dml_uint_t DETSizeOverride[], + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t ForceSingleDPP, + dml_uint_t NumberOfActiveSurfaces, + dml_bool_t UnboundedRequestEnabled, + dml_uint_t nomDETInKByte, + dml_uint_t MaxTotalDETInKByte, + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t MinCompressedBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInkByte, + dml_uint_t CompressedBufferSegmentSizeInkByteFinal, + enum dml_source_format_class SourcePixelFormat[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_uint_t RoundedUpMaxSwathSizeBytesY[], + dml_uint_t RoundedUpMaxSwathSizeBytesC[], + dml_uint_t DPPPerSurface[], + // Output + dml_uint_t DETBufferSizeInKByte[], + dml_uint_t *CompressedBufferSizeInkByte) +{ + dml_uint_t DETBufferSizePoolInKByte; + dml_uint_t NextDETBufferPieceInKByte; + dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__]; + dml_bool_t NextPotentialSurfaceToAssignDETPieceFound; + dml_uint_t NextSurfaceToAssignDETPiece; + dml_float_t TotalBandwidth; + dml_float_t BandwidthOfSurfacesNotAssignedDETPiece; + dml_uint_t max_minDET; + dml_uint_t minDET; + dml_uint_t minDET_pipe; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal); +#endif + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (DETSizeOverride[0] > 0) { + DETBufferSizeInKByte[0] = DETSizeOverride[0]; + } else { + DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = 0; + if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) { + max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; + } else { + max_minDET = nomDETInKByte; + } + minDET = 128; + minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (minDET <= max_minDET && minDET_pipe == 0) { + if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) + minDET_pipe = minDET; + minDET = minDET + ConfigReturnBufferSegmentSizeInkByte; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET); + dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET); + dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]); + dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]); +#endif + + if (minDET_pipe == 0) { + minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe); +#endif + } + + if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { + DETBufferSizeInKByte[k] = 0; + } else if (DETSizeOverride[k] > 0) { + DETBufferSizeInKByte[k] = DETSizeOverride[k]; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = minDET_pipe; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]); + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte); +#endif + } + + TotalBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) + TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + } + dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); +#endif + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); + BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece); +#endif + } + + for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + NextSurfaceToAssignDETPiece = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece); +#endif + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { + NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); +#endif + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue + // + //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); + // + //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1); + //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2); + + NextDETBufferPieceInKByte = (dml_uint_t)(dml_min( + dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true) + * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, + dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte); + dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece); + dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); + dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte); + dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); +#ifdef __DML_VBA_DEBUG__ + dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +#endif +} // CalculateDETBufferSize + + +/// @brief Calculate the bound for return buffer sizing +static void CalculateMaxDETAndMinCompressedBufferSize( + dml_uint_t ConfigReturnBufferSizeInKByte, + dml_uint_t ConfigReturnBufferSegmentSizeInKByte, + dml_uint_t ROBBufferSizeInKByte, + dml_uint_t MaxNumDPP, + dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA + + // Output + dml_uint_t *MaxTotalDETInKByte, + dml_uint_t *nomDETInKByte, + dml_uint_t *MinCompressedBufferSizeInKByte) +{ + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); +#endif + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} // CalculateMaxDETAndMinCompressedBufferSize + +/// @brief Calculate all the RQ request attributes, like row height and # swath +static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch, + struct CalculateVMRowAndSwath_params_st *p) +{ + struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels); + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->HostVMEnable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else if (p->GPUVMEnable == true) { + p->vm_group_bytes[k] = 2048; + if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { + p->dpte_group_bytes[k] = 512; + } else { + p->dpte_group_bytes[k] = 2048; + } + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 || + p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( + p->myPipe[k].ViewportStationary, + p->myPipe[k].DCCEnable, + p->myPipe[k].DPPPerSurface, + p->myPipe[k].BlockHeight256BytesC, + p->myPipe[k].BlockWidth256BytesC, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].SurfaceTiling, + p->myPipe[k].BytePerPixelC, + p->myPipe[k].SourceScan, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightChroma, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + p->GPUVMEnable, + p->GPUVMMaxPageTableLevels, + p->GPUVMMinPageSizeKBytes[k], + s->PTEBufferSizeInRequestsForChroma[k], + p->myPipe[k].PitchC, + p->myPipe[k].DCCMetaPitchC, + p->myPipe[k].BlockWidthC, + p->myPipe[k].BlockHeightC, + + // Output + &s->MetaRowByteC[k], + &s->PixelPTEBytesPerRowC[k], + &s->PixelPTEBytesPerRowStorageC[k], + &p->dpte_row_width_chroma_ub[k], + &p->dpte_row_height_chroma[k], + &p->dpte_row_height_linear_chroma[k], + &s->PixelPTEBytesPerRowC_one_row_per_frame[k], + &s->dpte_row_width_chroma_ub_one_row_per_frame[k], + &s->dpte_row_height_chroma_one_row_per_frame[k], + &p->meta_req_width_chroma[k], + &p->meta_req_height_chroma[k], + &p->meta_row_width_chroma[k], + &p->meta_row_height_chroma[k], + &p->PixelPTEReqWidthC[k], + &p->PixelPTEReqHeightC[k], + &p->PTERequestSizeC[k], + &p->dpde0_bytes_per_frame_ub_c[k], + &p->meta_pte_bytes_per_frame_ub_c[k]); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines ( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].SourceScan, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightChroma, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->PDEAndMetaPTEBytesFrameC = 0; + s->MetaRowByteC[k] = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + p->myPipe[k].ViewportStationary, + p->myPipe[k].DCCEnable, + p->myPipe[k].DPPPerSurface, + p->myPipe[k].BlockHeight256BytesY, + p->myPipe[k].BlockWidth256BytesY, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].SurfaceTiling, + p->myPipe[k].BytePerPixelY, + p->myPipe[k].SourceScan, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + p->GPUVMEnable, + p->GPUVMMaxPageTableLevels, + p->GPUVMMinPageSizeKBytes[k], + s->PTEBufferSizeInRequestsForLuma[k], + p->myPipe[k].PitchY, + p->myPipe[k].DCCMetaPitchY, + p->myPipe[k].BlockWidthY, + p->myPipe[k].BlockHeightY, + + // Output + &s->MetaRowByteY[k], + &s->PixelPTEBytesPerRowY[k], + &s->PixelPTEBytesPerRowStorageY[k], + &p->dpte_row_width_luma_ub[k], + &p->dpte_row_height_luma[k], + &p->dpte_row_height_linear_luma[k], + &s->PixelPTEBytesPerRowY_one_row_per_frame[k], + &s->dpte_row_width_luma_ub_one_row_per_frame[k], + &s->dpte_row_height_luma_one_row_per_frame[k], + &p->meta_req_width[k], + &p->meta_req_height[k], + &p->meta_row_width[k], + &p->meta_row_height[k], + &p->PixelPTEReqWidthY[k], + &p->PixelPTEReqHeightY[k], + &p->PTERequestSizeY[k], + &p->dpde0_bytes_per_frame_ub_l[k], + &p->meta_pte_bytes_per_frame_ub_l[k]); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].SourceScan, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + + p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels); + p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k]; + + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); +#endif + } + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]); + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY); + dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC); + dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); +#endif + } + + CalculateMALLUseForStaticScreen( + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->UseMALLForStaticScreen, // mode + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->UsesMALLForStaticScreen); // boolen + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->PTEBufferModeOverrideEn[k] == 1) { + p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k]; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || + (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64); + p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12); + } + + for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) || + (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]); + dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes); + dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->GPUVMEnable, + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->MetaRowByteY[k], + s->MetaRowByteC[k], + p->meta_row_height[k], + p->meta_row_height_chroma[k], + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + // Output + &p->meta_row_bw[k], + &p->dpte_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]); + dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static void CalculateOutputLink( + dml_float_t PHYCLKPerState, + dml_float_t PHYCLKD18PerState, + dml_float_t PHYCLKD32PerState, + dml_float_t Downspreading, + dml_bool_t IsMainSurfaceUsingTheIndicatedTiming, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + dml_uint_t HTotal, + dml_uint_t HActive, + dml_float_t PixelClockBackEnd, + dml_float_t ForcedOutputLinkBPP, + dml_uint_t DSCInputBitPerComponent, + dml_uint_t NumberOfDSCSlices, + dml_float_t AudioSampleRate, + dml_uint_t AudioSampleLayout, + enum dml_odm_mode ODMModeNoDSC, + enum dml_odm_mode ODMModeDSC, + enum dml_dsc_enable DSCEnable, + dml_uint_t OutputLinkDPLanes, + enum dml_output_link_dp_rate OutputLinkDPRate, + + // Output + dml_bool_t *RequiresDSC, + dml_bool_t *RequiresFEC, + dml_float_t *OutBpp, + enum dml_output_type_and_rate__type *OutputType, + enum dml_output_type_and_rate__rate *OutputRate, + dml_uint_t *RequiredSlots) +{ + dml_bool_t LinkDSCEnable; + dml_uint_t dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml_output_type_unknown; + *OutputRate = dml_output_rate_unknown; + + if (IsMainSurfaceUsingTheIndicatedTiming) { + if (Output == dml_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml_output_type_hdmi; + + } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) { + if (DSCEnable == dml_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp || Output == dml_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml_output_type_dp2p0; + *OutputRate = dml_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) { + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp; + *OutputRate = dml_output_rate_dp_rate_hbr3; + } + } + } + } +} + +/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy +static void CalculateODMMode( + dml_uint_t MaximumPixelsPerLinePerDSCUnit, + dml_uint_t HActive, + enum dml_output_encoder_class Output, + enum dml_output_format_class OutputFormat, + enum dml_odm_use_policy ODMUse, + dml_float_t StateDispclk, + dml_float_t MaxDispclk, + dml_bool_t DSCEnable, + dml_uint_t TotalNumberOfActiveDPP, + dml_uint_t MaxNumDPP, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_uint_t NumberOfDSCSlices, + + // Output + dml_bool_t *TotalAvailablePipesSupport, + dml_uint_t *NumberOfDPP, + enum dml_odm_mode *ODMMode, + dml_float_t *RequiredDISPCLKPerSurface) +{ + + dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine; + dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne; + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk); + *TotalAvailablePipesSupport = true; + + if (OutputFormat == dml_420) { + if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH) + *TotalAvailablePipesSupport = false; + else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml_odm_use_policy_combine_4to1; + else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1) + ODMUse = dml_odm_use_policy_combine_2to1; + if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1) + *TotalAvailablePipesSupport = false; + if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1) + *TotalAvailablePipesSupport = false; + } + + if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed) + *ODMMode = dml_odm_mode_bypass; + else if (ODMUse == dml_odm_use_policy_combine_2to1) + *ODMMode = dml_odm_mode_combine_2to1; + else if (ODMUse == dml_odm_use_policy_combine_4to1) + *ODMMode = dml_odm_mode_combine_4to1; + else if (ODMUse == dml_odm_use_policy_split_1to2) + *ODMMode = dml_odm_mode_split_1to2; + else if (ODMUse == dml_odm_use_policy_mso_1to2) + *ODMMode = dml_odm_mode_mso_1to2; + else if (ODMUse == dml_odm_use_policy_mso_1to4) + *ODMMode = dml_odm_mode_mso_1to4; + + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPP = 0; + + if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && + (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) { + if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { + *ODMMode = dml_odm_mode_combine_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *TotalAvailablePipesSupport = false; + } + } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed && + ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || + (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { + if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { + *ODMMode = dml_odm_mode_combine_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } else { + *TotalAvailablePipesSupport = false; + } + } else { + if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) { + *NumberOfDPP = 1; + } else { + *TotalAvailablePipesSupport = false; + } + } +} + +/// @brief Calculate the required DISPCLK given the odm mode and pixclk +static dml_float_t CalculateRequiredDispclk( + enum dml_odm_mode ODMMode, + dml_float_t PixelClock, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKRampingMargin, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t MaxDispclk) +{ + dml_float_t RequiredDispclk = 0.; + dml_float_t PixelClockAfterODM; + + dml_float_t DISPCLKWithRampingRoundedToDFSGranularity; + dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity; + dml_float_t MaxDispclkRoundedDownToDFSGranularity; + + if (ODMMode == dml_odm_mode_combine_4to1) { + PixelClockAfterODM = PixelClock / 4; + } else if (ODMMode == dml_odm_mode_combine_2to1) { + PixelClockAfterODM = PixelClock / 2; + } else { + PixelClockAfterODM = PixelClock; + } + + DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed); + DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed); + MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); + + if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { + RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) { + RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; + } else { + RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; + } + + return RequiredDispclk; +} + +/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + dml_float_t HRatio, + dml_float_t HRatioChroma, + dml_float_t VRatio, + dml_float_t VRatioChroma, + dml_float_t MaxDCHUBToPSCLThroughput, + dml_float_t MaxPSCLToLBThroughput, + dml_float_t PixelClock, + enum dml_source_format_class SourcePixelFormat, + dml_uint_t HTaps, + dml_uint_t HTapsChroma, + dml_uint_t VTaps, + dml_uint_t VTapsChroma, + + // Output + dml_float_t *PSCL_THROUGHPUT, + dml_float_t *PSCL_THROUGHPUT_CHROMA, + dml_float_t *DPPCLKUsingSingleDPP) +{ + dml_float_t DPPCLKUsingSingleDPPLuma; + dml_float_t DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +/// @brief Calculate the actual dppclk freq +/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane +/// @param DPPPerSurface Number of DPP for each plane +static void CalculateDPPCLK( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading, + dml_float_t DISPCLKDPPCLKVCOSpeed, + dml_float_t DPPCLKUsingSingleDPP[], + dml_uint_t DPPPerSurface[], + + // Output + dml_float_t *GlobalDPPCLK, + dml_float_t Dppclk[]) +{ + *GlobalDPPCLK = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); + } + *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); + + dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK); + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); + dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]); + } +} + +static void CalculateMALLUseForStaticScreen( + dml_uint_t NumberOfActiveSurfaces, + dml_uint_t MALLAllocatedForDCNFinal, + enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen, + dml_uint_t SurfaceSizeInMALL[], + dml_bool_t one_row_per_frame_fits_in_buffer[], + + // Output + dml_bool_t UsesMALLForStaticScreen[]) +{ + + dml_uint_t SurfaceToAddToMALL; + dml_bool_t CanAddAnotherSurfaceToMALL; + dml_uint_t TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable); + if (UsesMALLForStaticScreen[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]); + dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && + !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]); + } + } + if (CanAddAnotherSurfaceToMALL) { + UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +// @brief Calculate return bw for VM only traffic +dml_float_t dml_get_return_bw_mbps_vm_only( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed) +{ + dml_float_t VMDataOnlyReturnBW = + dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0, + DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes * + ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); +#endif + return VMDataOnlyReturnBW; +} + +// Function: dml_get_return_bw_mbps +// Megabyte per second +dml_float_t dml_get_return_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed) +{ + dml_float_t ReturnBW = 0.; + dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK; + dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; + dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; + dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100); + dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, + IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, + IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100); + + if (HostVMEnable != true) { + ReturnBW = PixelDataOnlyReturnBW; + } else { + ReturnBW = PixelMixedWithVMDataReturnBW; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); + dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); + dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); + dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); + dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); + dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); + dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); +#endif + return ReturnBW; +} + +// Function: dml_get_return_dram_bw_mbps +// Megabyte per second +static dml_float_t dml_get_return_dram_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DRAMSpeed) +{ + dml_float_t ReturnDRAMBW = 0.; + dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; + dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100; + dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : + soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100; + + if (HostVMEnable != true) { + ReturnDRAMBW = PixelDataOnlyReturnBW; + } else { + ReturnDRAMBW = PixelMixedWithVMDataReturnBW; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable); + dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); + dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); + dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); + dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); + dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW); +#endif + return ReturnDRAMBW; +} + +/// @brief BACKEND +static dml_uint_t DSCDelayRequirement( + dml_bool_t DSCEnabled, + enum dml_odm_mode ODMMode, + dml_uint_t DSCInputBitPerComponent, + dml_float_t OutputBpp, + dml_uint_t HActive, + dml_uint_t HTotal, + dml_uint_t NumberOfDSCSlices, + enum dml_output_format_class OutputFormat, + enum dml_output_encoder_class Output, + dml_float_t PixelClock, + dml_float_t PixelClockBackEnd) +{ + dml_uint_t DSCDelayRequirement_val = 0; + + if (DSCEnabled == true && OutputBpp != 0) { + if (ODMMode == dml_odm_mode_combine_4to1) { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } else if (ODMMode == dml_odm_mode_combine_2to1) { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } else { + DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)), + NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output); + } + DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0)); + DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled); + dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode); + dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + dml_print("DML::%s: HActive = %u\n", __func__, HActive); + dml_print("DML::%s: HTotal = %u\n", __func__, HTotal); + dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[]) +{ + dml_bool_t NotEnoughUrgentLatencyHiding = false; + dml_bool_t CalculateVActiveBandwithSupport_val = false; + dml_float_t VActiveBandwith = 0; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; + } + + CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding); + dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val); +#endif + return CalculateVActiveBandwithSupport_val; +} + +static void CalculatePrefetchBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + dml_bool_t NotUrgentLatencyHiding[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *PrefetchBandwidth, + dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *PrefetchBandwidthSupport) +{ + dml_bool_t NotEnoughUrgentLatencyHiding = false; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (NotUrgentLatencyHiding[k]) { + NotEnoughUrgentLatencyHiding = true; + } + } + + *PrefetchBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + *PrefetchBandwidthNotIncludingMALLPrefetch = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) + *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch + + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + + cursor_bw[k] * UrgentBurstFactorCursor[k] + + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + + *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; + *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth); + dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth); + dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport); +#endif +} + +static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t cursor_bw_pre[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[]) +{ + dml_float_t ret_val = ReturnBW; + + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u\n", __func__, k); + dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); + dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); + dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); + dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); + dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); + + dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); + dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); + dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); + dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); + dml_print("DML::%s: ret_val = %f\n", __func__, ret_val); +#endif + } + + return ret_val; +} + +static void CalculateImmediateFlipBandwithSupport( + dml_uint_t NumberOfActiveSurfaces, + dml_float_t ReturnBW, + enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[], + enum dml_immediate_flip_requirement ImmediateFlipRequirement[], + dml_float_t final_flip_bw[], + dml_float_t ReadBandwidthLuma[], + dml_float_t ReadBandwidthChroma[], + dml_float_t PrefetchBandwidthLuma[], + dml_float_t PrefetchBandwidthChroma[], + dml_float_t cursor_bw[], + dml_float_t meta_row_bandwidth[], + dml_float_t dpte_row_bandwidth[], + dml_float_t cursor_bw_pre[], + dml_float_t prefetch_vmrow_bw[], + dml_uint_t NumberOfDPP[], + dml_float_t UrgentBurstFactorLuma[], + dml_float_t UrgentBurstFactorChroma[], + dml_float_t UrgentBurstFactorCursor[], + dml_float_t UrgentBurstFactorLumaPre[], + dml_float_t UrgentBurstFactorChromaPre[], + dml_float_t UrgentBurstFactorCursorPre[], + + // Output + dml_float_t *TotalBandwidth, + dml_float_t *TotalBandwidthNotIncludingMALLPrefetch, + dml_float_t *FractionOfUrgentBandwidth, + dml_bool_t *ImmediateFlipBandwidthSupport) +{ + *TotalBandwidth = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { + + + + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } else { + *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k = %u\n", __func__, k); + dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]); + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); + dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]); + dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]); + dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]); + dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]); + dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]); + dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]); + dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]); + dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]); + dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]); + dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]); + dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]); + dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]); + dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]); + dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]); + dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]); +#endif + } + + *TotalBandwidthNotIncludingMALLPrefetch = 0; + for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) { + if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) + *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + else + *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); + } + } + + *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); + *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); + dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth); + dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport); +#endif +} + +static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock) +{ + dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0); + + return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0); +} + +/// @brief Calculate the maximum vstartup for mode support and mode programming consideration +/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge +static dml_uint_t CalculateMaxVStartup( + dml_uint_t plane_idx, + dml_bool_t ptoi_supported, + dml_uint_t vblank_nom_default_us, + struct dml_timing_cfg_st *timing, + dml_float_t write_back_delay_us) +{ + dml_uint_t vblank_size = 0; + dml_uint_t max_vstartup_lines = 0; + const dml_uint_t max_allowed_vblank_nom = 1023; + + dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx]; + dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx]; + + dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx], + timing->PixelClock[plane_idx]); + dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line); + + // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2) + // + 2 is because + // 1 -> VStartup_start should be 1 line before VSync + // 1 -> always reserve 1 line between start of VBlank to VStartup signal + dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input, + timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2); + dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom); + dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ? + vblank_nom_default_in_line : vblank_nom_max_allowed_capped; + + vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail); + + if (timing->Interlace[plane_idx] && !ptoi_supported) + max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx); + dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]); + dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us); + dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START); + return max_vstartup_lines; +} + +static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib, + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params, + dml_uint_t j, + dml_uint_t k) +{ + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k]; + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k]; + CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; +} + +static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib) +{ + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct DmlPipe *myPipe; + dml_uint_t j, k; + + for (j = 0; j < 2; ++j) { + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; + mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; + mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; + mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; + mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; + mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; + mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j]; + mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j]; + mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; + mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; + mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; + } + + mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.NotUrgentLatencyHiding, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j]); + + s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( + &mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.state.fabricclk_mhz, + mode_lib->ms.state.dram_speed_mts); + + s->HostVMInefficiencyFactor = 1; + if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) + s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState; + + mode_lib->ms.ExtraLatency = CalculateExtraLatency( + mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLKState[j], + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.pixel_chunk_size_kbytes, + mode_lib->ms.TotalNumberOfDCCActiveDPP[j], + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.dpte_group_bytes, + s->HostVMInefficiencyFactor, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); + + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + s->MaxVStartup = 0; + s->AllPrefetchModeTested = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); + s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; + } + + do { + s->MaxVStartup = s->NextMaxVStartup; + s->AllPrefetchModeTested = true; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k]; + mode_lib->ms.TWait = CalculateTWait( + mode_lib->ms.PrefetchMode[k], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], + mode_lib->ms.state.dram_clock_change_latency_us, + mode_lib->ms.state.fclk_change_latency_us, + mode_lib->ms.UrgLatency, + mode_lib->ms.state.sr_enter_plus_exit_time_us); + + myPipe = &s->myPipe; + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j]; + myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; + myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; + myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; + myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; + myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; + myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + myPipe->ODMMode = mode_lib->ms.ODMModePerState[k]; + myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k); + dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]); + dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup); + dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]); + dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); + dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]); +#endif + + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k])); + CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k]; + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + + set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k); + + mode_lib->ms.support.NoTimeForPrefetch[j][k] = + CalculatePrefetchSchedule(&mode_lib->scratch, + CalculatePrefetchSchedule_params); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.swath_width_luma_ub_this_state[k], + mode_lib->ms.swath_width_chroma_ub_this_state[k], + mode_lib->ms.SwathHeightYThisState[k], + mode_lib->ms.SwathHeightCThisState[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.VRatioPreY[j][k], + mode_lib->ms.VRatioPreC[j][k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeYThisState[k], + mode_lib->ms.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorCursorPre[k], + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotUrgentLatencyHidingPre[k]); + + mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k]; + } + + { + CalculatePrefetchBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.NotUrgentLatencyHidingPre, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *PrefetchBandwidth + &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.PrefetchSupported[j]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0 + || mode_lib->ms.LinesForMetaPTE[k] >= 32.0 + || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) { + mode_lib->ms.support.PrefetchSupported[j] = false; + } + } + + mode_lib->ms.support.DynamicMetadataSupported[j] = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) { + mode_lib->ms.support.DynamicMetadataSupported[j] = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported[j] = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true || + mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && + (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) { + mode_lib->ms.support.VRatioInPrefetchSupported[j] = false; + } + } + + s->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) { + s->AnyLinesForVMOrRowTooLarge = true; + } + } + + if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]); + if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); + } else { + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k]; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateFlipSchedule( + s->HostVMInefficiencyFactor, + mode_lib->ms.ExtraLatency, + mode_lib->ms.UrgLatency, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], + mode_lib->ms.MetaRowBytes[j][k], + mode_lib->ms.DPTEBytesPerRow[j][k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.Tno_bw[k], + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.meta_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.meta_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24 + + /* Output */ + &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + { + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBWPerState[j], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + mode_lib->ms.final_flip_bw, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.meta_row_bandwidth_this_state, + mode_lib->ms.dpte_row_bandwidth_this_state, + mode_lib->ms.cursor_bw_pre, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre, + + /* output */ + &s->dummy_single[0], // dml_float_t *TotalBandwidth + &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch + &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth + &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)) + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + } else { // if prefetch not support, assume iflip not supported + mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false; + } + + if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) { + s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; + + if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) + s->AllPrefetchModeTested = false; + } + } else { + s->NextMaxVStartup = s->NextMaxVStartup - 1; + } + } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true && + mode_lib->ms.support.VRatioInPrefetchSupported[j] == true && + // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok + // If there is hostvm, DCN needs to support iflip for invalidation + ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) || + (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested))); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k]; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; + s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; + s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; + + CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; + CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j]; + CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j]; + CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height; + CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j]; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; + CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; + CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; + CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; + CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; + CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; + CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; + CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; + CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState; + + // Output + CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, + CalculateWatermarks_params); + + } // for j +} + +/// @brief The Mode Support function. +dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) +{ + struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params; + struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + + dml_uint_t j, k, m; + + mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); + dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ms.ip.config_return_buffer_size_in_kbytes, + mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA + mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); + + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false + && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0 + || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0 + || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0 + || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1) + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio + || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k] + || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k] + || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 || + (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) || + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio || + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio || + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] || + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) { + mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k]; + mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k]; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0; + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true + && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) { + mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] + * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] + / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] + * mode_lib->ms.cache_display_cfg.timing.HTotal[k] + / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0; + } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] + * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] + / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] + * mode_lib->ms.cache_display_cfg.timing.HTotal[k] + / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + /*Writeback Mode Support Check*/ + s->TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + } + } + + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + + /*Writeback Scale Ratio and Taps Support Check*/ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio + || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps + || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps + || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] + || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] + || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + mode_lib->ms.cache_display_cfg.plane.HRatio[k], + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.plane.HTaps[k], + mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], + mode_lib->ms.cache_display_cfg.plane.VTaps[k], + mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { + s->MaximumSwathWidthSupportLuma = 8192; + } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 7680; + } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 4320; + } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) { + s->MaximumSwathWidthSupportLuma = 3840; + } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + s->MaximumSwathWidthSupportLuma = 3072; + } else { + s->MaximumSwathWidthSupportLuma = 6144; + } + + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) { + s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] / + (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = + mode_lib->ms.ip.line_buffer_size_bits + * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0) + / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] + / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] + + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0)); + } + mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /*Number Of DSC Slices*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && + mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) { + mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k]; + + if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4)); + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */ + + s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + s->MPCCombineMethodAsPossible = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage) + s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible) + s->MPCCombineMethodAsPossible = true; + } + mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible; + + for (j = 0; j < 2; j++) { + mode_lib->ms.TotalNumberOfActiveDPP[j] = 0; + mode_lib->ms.support.TotalAvailablePipesSupport[j] = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateODMMode( + mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.policy.ODMUse[k], + mode_lib->ms.state.dispclk_mhz, + mode_lib->ms.max_state.dispclk_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit, + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.policy.ODMUse[k], + mode_lib->ms.state.dispclk_mhz, + mode_lib->ms.max_state.dispclk_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP[j], + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + CalculateOutputLink( + mode_lib->ms.state.phyclk_mhz, + mode_lib->ms.state.phyclk_d18_mhz, + mode_lib->ms.state.phyclk_d32_mhz, + mode_lib->ms.soc.phy_downspread_percent, + (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k), + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], + mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k], + s->ODMModeNoDSC, + s->ODMModeDSC, + mode_lib->ms.cache_display_cfg.output.DSCEnable[k], + mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k], + mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k], + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBppPerState[k], + &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng + &mode_lib->ms.OutputRatePerState[k], + &mode_lib->ms.RequiredSlots[k]); + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 4; + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 2; + } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100), + 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz && + mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { + mode_lib->ms.MPCCombine[j][k] = true; + mode_lib->ms.NoOfDPP[j][k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; + } else { + mode_lib->ms.MPCCombine[j][k] = false; + mode_lib->ms.NoOfDPP[j][k] = 1; + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + } + } + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0; + s->NoChromaOrLinear = true; + for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[j][k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1; + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12 + || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha + || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) { + s->NoChromaOrLinear = false; + } + } + + if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting, + mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear, + mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) { + while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) { + s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage && + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth && + (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) && + mode_lib->ms.MPCCombine[j][k] == false) { + s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; + s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; + } + } + mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true; + mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2; + mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1; + mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1; + } + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { + mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.ip.writeback_line_buffer_buffer_size, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); + } + } + + mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]); + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + } + + CalculateDPPCLK(mode_lib->ms.num_active_planes, + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.MinDPPCLKUsingSingleDPP, + mode_lib->ms.NoOfDPPThisState, + /* Output */ + &mode_lib->ms.GlobalDPPCLK, + mode_lib->ms.RequiredDPPCLKThisState); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k]; + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz)); + + if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) { + mode_lib->ms.support.TotalAvailablePipesSupport[j] = false; + } + } // j + + /* Total Available OTG, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) + s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + } + } + } + } + + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg); + mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs); + + /* Display IO and DSC Support Check */ + mode_lib->ms.support.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && + !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0 + || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component + )) { + mode_lib->ms.support.NonsupportedDSCInputBPC = true; + } + } + + mode_lib->ms.support.ExceededMultistreamSlots = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) { + s->TotalSlots = mode_lib->ms.RequiredSlots[k]; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k) + s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j]; + } + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63) + mode_lib->ms.support.ExceededMultistreamSlots = true; + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64) + mode_lib->ms.support.ExceededMultistreamSlots = true; + } + } + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false && + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0) + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; + if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) || + ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na) + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) + mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0) + mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + } + } + + if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) + mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j) + mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + } + } + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 || + mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) || + (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl && + RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.OutputBppPerState[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k], + mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } + + mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true; + mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) { + mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false; + } + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) { + mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false; + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) { + s->DSCFormatFactor = 2; + } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) { + s->DSCFormatFactor = 1; + } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz); + dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); +#endif + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } else { + if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + } + } + } + } + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCUnits = false; + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true) { + if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } else { + if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + } + } + if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMModePerState[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.OutputBppPerState[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) { + mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m]; + } + } + } + } + + //Calculate Swath, DET Configuration, DCFCLKDeepSleep + // + for (j = 0; j < 2; ++j) { + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k]; + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j]; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k]; + mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k]; + mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k]; + mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k]; + mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k]; + mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k]; + mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState; + mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState; + mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k]; + mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k]; + mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + } + + CalculateSurfaceSizeInMall( + mode_lib->ms.num_active_planes, + mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, + mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k]; + mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k]; + mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k]; + mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k]; + mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k]; + mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k]; + mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k]; + mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k]; + mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k]; + mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k]; + mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k]; + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k]; + s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; + s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; + s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; + s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; + s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; + s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; + s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; + s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k]; + } + + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; + CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState; + CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA + CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5]; + CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7]; + CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9]; + CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState; + CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState; + CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state; + CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21]; + + CalculateVMRowAndSwath(&mode_lib->scratch, + CalculateVMRowAndSwath_params); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k]; + mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k]; + mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k]; + mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k]; + mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k]; + mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k]; + mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k]; + mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k]; + mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k]; + } + + mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]); +#endif + + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false; + } + + mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, + mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, + mode_lib->ms.state.urgent_latency_vm_data_only_us, + mode_lib->ms.soc.do_urgent_latency_adjustment, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, + mode_lib->ms.state.fabricclk_mhz); + + /* Getter functions work at mp interface so copy the urgent latency to mp*/ + mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.swath_width_luma_ub_this_state[k], + mode_lib->ms.swath_width_chroma_ub_this_state[k], + mode_lib->ms.SwathHeightYThisState[k], + mode_lib->ms.SwathHeightCThisState[k], + (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeYThisState[k], + mode_lib->ms.DETBufferSizeCThisState[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorCursor[j][k], + &mode_lib->ms.UrgentBurstFactorLuma[j][k], + &mode_lib->ms.UrgentBurstFactorChroma[j][k], + &mode_lib->ms.NotUrgentLatencyHiding[k]); + } + + CalculateDCFCLKDeepSleep( + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + mode_lib->ms.SwathWidthYThisState, + mode_lib->ms.SwathWidthCThisState, + mode_lib->ms.NoOfDPPThisState, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLKThisState, + mode_lib->ms.ReadBandwidthLuma, + mode_lib->ms.ReadBandwidthChroma, + mode_lib->ms.soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]); + } + + //Calculate Return BW + for (j = 0; j < 2; ++j) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j]; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) { + mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k], + mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m], + mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]); + } + } + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m]; + } + } + } + s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->MaximumVStartup[j][k] = CalculateMaxVStartup(k, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.ip.vblank_nom_default_us, + &mode_lib->ms.cache_display_cfg.timing, + mode_lib->ms.WritebackDelayTime[k]); + + s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k])); +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]); + dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]); +#endif + } + } + + s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + for (j = 0; j < 2; ++j) { + mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz; + } + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequiredFinal = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + } + + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified || + ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) && + (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)); + } + mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal; + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) && + (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) || + ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame) + s->FullFrameMALLPStateMethod = true; + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) { + s->SubViewportMALLPStateMethod = true; + if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) + || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + + if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) { + UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter; + UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us; + UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes; + UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles; + UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes; + UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes; + UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; + UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent; + UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent; + UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes; + UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired; + UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; + UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface; + UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK; + UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency; + UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP; + UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep; + UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup; + UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP; + UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP; + UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY; + UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC; + UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states; + UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states; + UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY; + UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC; + UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame; + UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes; + UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable; + UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma; + UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma; + UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz; + UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState; + + UseMinimumDCFCLK(&mode_lib->scratch, + UseMinimumDCFCLK_params); + + } // UseMinimumRequiredDCFCLK == true + + for (j = 0; j < 2; ++j) { + mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz, + mode_lib->ms.state.dram_speed_mts); + mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.state.dram_speed_mts); + } + + //Re-ordering Buffer Support Check + for (j = 0; j < 2; ++j) { + if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] > + (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) { + mode_lib->ms.support.ROBSupport[j] = true; + } else { + mode_lib->ms.support.ROBSupport[j] = false; + } + dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__); + } + + //Vertical Active BW support check + s->MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k]; + } + + for (j = 0; j < 2; ++j) { + mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0, + mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0, + mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes * + ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ? + mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0); + + if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) { + mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true; + } else { + mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false; + } + } + + /* Prefetch Check */ + dml_prefetch_check(mode_lib); + + // End of Prefetch Check + dml_print("DML::%s: Done prefetch calculation\n", __func__); + + /*Cursor Support Check*/ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) { + if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) { + mode_lib->ms.support.CursorSupport = false; + } + } + } + + /*Valid Pitch Check*/ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), + mode_lib->ms.MacroTileWidthY[k]); + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + } + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16 + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe + && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) { + mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]); + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + } else { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + } + } else { + mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + } + if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] || + mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) { + mode_lib->ms.support.PitchSupport = false; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) { + mode_lib->ms.support.ViewportExceedsSurface = true; + if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 && + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) { + if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + + /*Mode Support, Voltage State and SOC Configuration*/ + for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on + dml_print("DML::%s: checking support for j=%u\n", __func__, j); + dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx); + + s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx); + s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts); + + s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal || + (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) || + mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported); + s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal || + (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) || + mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported); + + if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true + && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true + && mode_lib->ms.support.ViewportSizeSupport[j] == true + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + && mode_lib->ms.support.LinkCapacitySupport == true + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP + && !mode_lib->ms.support.DSC422NativeNotSupported + && !mode_lib->ms.support.MPCCombineMethodIncompatible + && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true + && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true + && mode_lib->ms.support.NotEnoughDSCUnits == false + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->ms.support.ROBSupport[j] == true + && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true + && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true + && mode_lib->ms.support.NumberOfOTGSupport == true + && mode_lib->ms.support.NumberOfHDMIFRLSupport == true + && mode_lib->ms.support.NumberOfDP2p0Support == true + && mode_lib->ms.support.EnoughWritebackUnits == true + && mode_lib->ms.support.WritebackLatencySupport == true + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true + && mode_lib->ms.support.CursorSupport == true + && mode_lib->ms.support.PitchSupport == true + && mode_lib->ms.support.ViewportExceedsSurface == false + && mode_lib->ms.support.PrefetchSupported[j] == true + && mode_lib->ms.support.VActiveBandwithSupport[j] == true + && mode_lib->ms.support.DynamicMetadataSupported[j] == true + && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true + && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true + && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true + && mode_lib->ms.support.NonsupportedDSCInputBPC == false + && !mode_lib->ms.support.ExceededMALLSize + && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) + && s->dram_clock_change_support == true + && s->f_clock_change_support == true + && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) { + dml_print("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport[j] = true; + } else { + dml_print("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport[j] = false; + dml_print_mode_support(mode_lib, j); + } + } + + mode_lib->ms.support.MaximumMPCCombine = 0; + mode_lib->ms.support.ModeIsSupported = 0; + if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine + mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true; + + // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc. + if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible || + (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal && + (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) && + !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) || + ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr + || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame + || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp + ) && + mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported))) + || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal && + ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) || + (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) { + mode_lib->ms.support.MaximumMPCCombine = 1; + } else { + mode_lib->ms.support.MaximumMPCCombine = 0; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip + mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational + mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational + + dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported); + dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine); + dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled); + dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k]; + } + + mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts; + mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz; + mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz; + mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine]; + mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine]; + mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine]; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k]; + } else { + mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass; + } + + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k]; + mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k]; + } + + return mode_lib->ms.support.ModeIsSupported; +} // dml_core_mode_support + +/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all +void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib) +{ + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ms.ip.config_return_buffer_size_in_kbytes, + mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + mode_lib->ms.ip.max_num_dpp, + mode_lib->ms.policy.NomDETInKByteOverrideEnable, + mode_lib->ms.policy.NomDETInKByteOverrideValue, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported); + + mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.DRAMSpeed); + dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); + +} // dml_core_mode_support_partial + +/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state +void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg) +{ + struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + + struct mode_program_st *locals = &mode_lib->mp; + struct DmlPipe *myPipe; + dml_uint_t j = 0, k = 0; + dml_float_t TWait; + dml_bool_t isInterlaceTiming; + + mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg); + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); + dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- START --- \n", __func__); + dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); +#endif + + s->DSCFormatFactor = 0; + + // Unlike dppclk and dispclk which can be calculated in mode_programming + // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation) + if (clk_cfg->dcfclk_option != dml_use_override_freq) + locals->Dcfclk = mode_lib->ms.DCFCLK; + else + locals->Dcfclk = clk_cfg->dcfclk_mhz; + +#ifdef __DML_VBA_DEBUG__ + dml_print_dml_policy(&mode_lib->ms.policy); + dml_print_soc_state_bounding_box(&mode_lib->ms.state); + dml_print_soc_bounding_box(&mode_lib->ms.soc); + dml_print_clk_cfg(clk_cfg); + + dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk); + dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); +#endif + + locals->WritebackDISPCLK = 0.0; + locals->GlobalDPPCLK = 0.0; + + // DISPCLK and DPPCLK Calculation + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) { + locals->WritebackDISPCLK = + dml_max( + locals->WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.ip.writeback_line_buffer_buffer_size, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz)); + } + } + + locals->Dispclk_calculated = locals->WritebackDISPCLK; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk( + mode_lib->ms.cache_display_cfg.hw.ODMMode[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.ip.dispclk_ramp_margin_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + mode_lib->ms.max_state.dispclk_mhz)); + } + } + if (clk_cfg->dispclk_option == dml_use_required_freq) + locals->Dispclk = locals->Dispclk_calculated; + else if (clk_cfg->dispclk_option == dml_use_override_freq) + locals->Dispclk = clk_cfg->dispclk_mhz; + else + locals->Dispclk = mode_lib->ms.state.dispclk_mhz; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk); +#endif + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + mode_lib->ms.cache_display_cfg.plane.HRatio[k], + mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk, + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.plane.HTaps[k], + mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k], + mode_lib->ms.cache_display_cfg.plane.VTaps[k], + mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k], + + /* Output */ + &locals->PSCL_THROUGHPUT[k], + &locals->PSCL_THROUGHPUT_CHROMA[k], + &locals->DPPCLKUsingSingleDPP[k]); + } + + CalculateDPPCLK(mode_lib->ms.num_active_planes, + mode_lib->ms.soc.dcn_downspread_percent, + mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz, + locals->DPPCLKUsingSingleDPP, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + /* Output */ + &locals->GlobalDPPCLK, + locals->Dppclk_calculated); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (clk_cfg->dppclk_option[k] == dml_use_required_freq) + locals->Dppclk[k] = locals->Dppclk_calculated[k]; + else if (clk_cfg->dppclk_option[k] == dml_use_override_freq) + locals->Dppclk[k] = clk_cfg->dppclk_mhz[k]; + else + locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]); +#endif + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + + /* Output */ + &locals->BytePerPixelY[k], + &locals->BytePerPixelC[k], + &locals->BytePerPixelDETY[k], + &locals->BytePerPixelDETC[k], + &locals->BlockHeight256BytesY[k], + &locals->BlockHeight256BytesC[k], + &locals->BlockWidth256BytesY[k], + &locals->BlockWidth256BytesC[k], + &locals->BlockHeightY[k], + &locals->BlockHeightC[k], + &locals->BlockWidthY[k], + &locals->BlockWidthC[k]); + } + + + dml_print("DML::%s: %u\n", __func__, __LINE__); + CalculateSwathWidth( + false, // ForceSingleDPP + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + mode_lib->ms.cache_display_cfg.hw.ODMMode, + locals->BytePerPixelY, + locals->BytePerPixelC, + locals->BlockHeight256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidth256BytesY, + locals->BlockWidth256BytesC, + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, + mode_lib->ms.cache_display_cfg.timing.HActive, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + + /* Output */ + locals->SwathWidthSingleDPPY, + locals->SwathWidthSingleDPPC, + locals->SwathWidthY, + locals->SwathWidthC, + s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[] + s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[] + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); + } + + CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride; + CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary; + CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat; + CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling; + CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth; + CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight; + CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart; + CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart; + CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC; + CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC; + CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY; + CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC; + CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY; + CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode; + CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC; + CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive; + CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + + // VBA_DELTA + // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage + CalculateSwathAndDETConfiguration(&mode_lib->scratch, + CalculateSwathAndDETConfiguration_params); + + // DCFCLK Deep Sleep + CalculateDCFCLKDeepSleep( + mode_lib->ms.num_active_planes, + locals->BytePerPixelY, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->SwathWidthY, + locals->SwathWidthC, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->PSCL_THROUGHPUT, + locals->PSCL_THROUGHPUT_CHROMA, + locals->Dppclk, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + mode_lib->ms.soc.return_bus_width_bytes, + + /* Output */ + &locals->DCFCLKDeepSleep); + + // DSCCLK + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) { + locals->DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) + s->DSCFormatFactor = 2; + else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) + s->DSCFormatFactor = 1; + else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) + s->DSCFormatFactor = 2; + else + s->DSCFormatFactor = 1; + if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1) + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1) + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + else + locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100); + } + } + + // DSC Delay + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k], + mode_lib->ms.cache_display_cfg.hw.ODMMode[k], + mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k], + mode_lib->ms.cache_display_cfg.output.OutputBpp[k], + mode_lib->ms.cache_display_cfg.timing.HActive[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k], + mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k], + mode_lib->ms.cache_display_cfg.output.OutputFormat[k], + mode_lib->ms.cache_display_cfg.output.OutputEncoder[k], + mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]); + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces + if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j]) + locals->DSCDelay[k] = locals->DSCDelay[j]; + + // Prefetch + CalculateSurfaceSizeInMall( + mode_lib->ms.num_active_planes, + mode_lib->ms.soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.plane.ViewportStationary, + mode_lib->ms.cache_display_cfg.plane.ViewportXStart, + mode_lib->ms.cache_display_cfg.plane.ViewportYStart, + mode_lib->ms.cache_display_cfg.plane.ViewportXStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportYStartC, + mode_lib->ms.cache_display_cfg.plane.ViewportWidth, + mode_lib->ms.cache_display_cfg.plane.ViewportHeight, + locals->BytePerPixelY, + mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma, + mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY, + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY, + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC, + locals->BlockWidth256BytesY, + locals->BlockWidth256BytesC, + locals->BlockHeight256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidthY, + locals->BlockWidthC, + locals->BlockHeightY, + locals->BlockHeightC, + + /* Output */ + locals->SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */ + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k]; + s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k]; + s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; + s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k]; + s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k]; + s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k]; + s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]; + s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k]; + s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k]; + s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k]; + s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]; + s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k]; + s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k]; + s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k]; + s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k]; + s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k]; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k]; + s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k]; + } + + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen; + CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC; + CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; + CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width; + CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height; + CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width; + CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height; + CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC; + CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw; + CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame; + CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte; + CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE; + + CalculateVMRowAndSwath(&mode_lib->scratch, + CalculateVMRowAndSwath_params); + + s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3( + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc, + mode_lib->ms.state.use_ideal_dram_bw_strobe, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + locals->Dcfclk, + mode_lib->ms.FabricClock, + mode_lib->ms.DRAMSpeed); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk); + dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes); + dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes); + dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent); + dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed); + dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans); + dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes); + dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx); + dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx); + dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe); + dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW); + dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW); +#endif + + s->HostVMInefficiencyFactor = 1.0; + if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable) + s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW; + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + } + + locals->UrgentExtraLatency = CalculateExtraLatency( + mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderBytes, + locals->Dcfclk, + s->TotalActiveDPP, + mode_lib->ms.ip.pixel_chunk_size_kbytes, + s->TotalDCCActiveDPP, + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->dpte_group_bytes, + s->HostVMInefficiencyFactor, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); + + locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + locals->WritebackDelay[k] = + mode_lib->ms.state.writeback_latency_us + + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk; + } else + locals->WritebackDelay[k] = 0; + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) { + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k + && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) { + locals->WritebackDelay[k] = + dml_max( + locals->WritebackDelay[k], + mode_lib->ms.state.writeback_latency_us + + CalculateWriteBackDelay( + mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j], + mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j], + mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk); + } + } + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + for (j = 0; j < mode_lib->ms.num_active_planes; ++j) + if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j) + locals->WritebackDelay[k] = locals->WritebackDelay[j]; + + locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us, + mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us, + mode_lib->ms.state.urgent_latency_vm_data_only_us, + mode_lib->ms.soc.do_urgent_latency_adjustment, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us, + mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz, + mode_lib->ms.FabricClock); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + locals->swath_width_luma_ub[k], + locals->swath_width_chroma_ub[k], + locals->SwathHeightY[k], + locals->SwathHeightC[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + locals->UrgentLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + locals->DETBufferSizeY[k], + locals->DETBufferSizeC[k], + + /* output */ + &locals->UrgBurstFactorCursor[k], + &locals->UrgBurstFactorLuma[k], + &locals->UrgBurstFactorChroma[k], + &locals->NoUrgentLatencyHiding[k]); + + locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / + ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k]; + } + + s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + s->MaxVStartupAllPlanes = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup(k, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.ip.vblank_nom_default_us, + &mode_lib->ms.cache_display_cfg.timing, + locals->WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]); +#endif + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k])); + + s->ImmediateFlipRequirementFinal = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal); +#endif + + // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement + // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) + s->iteration = 0; + s->MaxTotalRDBandwidth = 0; + s->AllPrefetchModeTested = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]); + s->NextPrefetchMode[k] = s->MinPrefetchMode[k]; + } + + do { + s->MaxTotalRDBandwidthNoUrgentBurst = 0.0; + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines); + + s->AllPrefetchModeTested = true; + s->MaxTotalRDBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->PrefetchMode[k] = s->NextPrefetchMode[k]; + TWait = CalculateTWait( + locals->PrefetchMode[k], + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k], + mode_lib->ms.state.dram_clock_change_latency_us, + mode_lib->ms.state.fclk_change_latency_us, + locals->UrgentLatency, + mode_lib->ms.state.sr_enter_plus_exit_time_us); + + myPipe = &s->myPipe; + myPipe->Dppclk = locals->Dppclk[k]; + myPipe->Dispclk = locals->Dispclk; + myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; + myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]; + myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k]; + myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k]; + myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; + myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; + myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; + myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; + myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k]; + myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k]; + myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]; + myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k]; + myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k]; + myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k]; + myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k]; + myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k]; + myPipe->BytePerPixelY = locals->BytePerPixelY[k]; + myPipe->BytePerPixelC = locals->BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]); + dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]); +#endif + + CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]); + CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k]; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); + CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; + CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; + CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k]; + CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency; + CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency; + CalculatePrefetchSchedule_params->TCalc = locals->TCalc; + CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k]; + CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = TWait; + CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k]; + CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k]; + + locals->NoTimeToPrefetch[k] = + CalculatePrefetchSchedule(&mode_lib->scratch, + CalculatePrefetchSchedule_params); + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]); +#endif + locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k])); + locals->VStartupMin[k] = locals->VStartup[k]; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateUrgentBurstFactor( + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k], + locals->swath_width_luma_ub[k], + locals->swath_width_chroma_ub[k], + locals->SwathHeightY[k], + locals->SwathHeightC[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + locals->UrgentLatency, + mode_lib->ms.ip.cursor_buffer_size, + mode_lib->ms.cache_display_cfg.plane.CursorWidth[k], + mode_lib->ms.cache_display_cfg.plane.CursorBPP[k], + locals->VRatioPrefetchY[k], + locals->VRatioPrefetchC[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + locals->DETBufferSizeY[k], + locals->DETBufferSizeC[k], + /* Output */ + &locals->UrgBurstFactorCursorPre[k], + &locals->UrgBurstFactorLumaPre[k], + &locals->UrgBurstFactorChromaPre[k], + &locals->NoUrgentLatencyHidingPre[k]); + + locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]); + dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]); + + dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]); + dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]); + + dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]); + dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); + dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]); + dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]); + dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]); + dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]); + dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]); + dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]); + dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst); +#endif + if (locals->DestinationLinesForPrefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) && + (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__))) + s->VRatioPrefetchMoreThanMax = true; + + //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false; + //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false; + //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) { + // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true; + //} + + //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) { + // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true; + //} + } + + locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst); + dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW); + dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth); +#endif + + CalculatePrefetchBandwithSupport( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->NoUrgentLatencyHidingPre, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre, + + /* output */ + &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth + &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth + &locals->PrefetchModeSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + s->dummy_unit_vector[k] = 1.0; + + CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->NoUrgentLatencyHidingPre, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + + /* output */ + &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth + &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch + &locals->FractionOfUrgentBandwidth, + &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { + dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + locals->PrefetchModeSupported = false; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) { + dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]); + dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]); + locals->PrefetchModeSupported = false; + } + } + + + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { + locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( + mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->cursor_bw_pre, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre); + + locals->TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]); + if (locals->use_one_row_for_frame_flip[k]) { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]); + } else { + locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k = %u\n", __func__, k); + dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]); + dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]); + dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]); + dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]); + dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes); +#endif + } + } + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateFlipSchedule( + s->HostVMInefficiencyFactor, + locals->UrgentExtraLatency, + locals->UrgentLatency, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.HostVMEnable, + mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + locals->PDEAndMetaPTEBytesFrame[k], + locals->MetaRowByte[k], + locals->PixelPTEBytesPerRow[k], + locals->BandwidthAvailableForImmediateFlip, + locals->TotImmediateFlipBytes, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k], + mode_lib->ms.cache_display_cfg.plane.VRatio[k], + mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], + locals->Tno_bw[k], + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + locals->dpte_row_height[k], + locals->meta_row_height[k], + locals->dpte_row_height_chroma[k], + locals->meta_row_height_chroma[k], + locals->use_one_row_for_frame_flip[k], + + /* Output */ + &locals->DestinationLinesToRequestVMInImmediateFlip[k], + &locals->DestinationLinesToRequestRowInImmediateFlip[k], + &locals->final_flip_bw[k], + &locals->ImmediateFlipSupportedForPipe[k]); + } + + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + locals->final_flip_bw, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + locals->UrgBurstFactorLuma, + locals->UrgBurstFactorChroma, + locals->UrgBurstFactorCursor, + locals->UrgBurstFactorLumaPre, + locals->UrgBurstFactorChromaPre, + locals->UrgBurstFactorCursorPre, + + /* output */ + &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth + &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch + &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth + &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport + + CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes, + mode_lib->ms.ReturnBW, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + mode_lib->ms.policy.ImmediateFlipRequirement, + locals->final_flip_bw, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->RequiredPrefetchPixDataBWLuma, + locals->RequiredPrefetchPixDataBWChroma, + locals->cursor_bw, + locals->meta_row_bw, + locals->dpte_row_bw, + locals->cursor_bw_pre, + locals->prefetch_vmrow_bw, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + s->dummy_unit_vector, + + /* output */ + &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth + &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch + &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth + &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) { + locals->ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); +#endif + } + } + } else { + locals->ImmediateFlipSupported = false; + locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth; + locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch; + locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth; + locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch; + } + + /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */ + locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true && + ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) || + locals->ImmediateFlipSupported)) ? true : false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported); + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) + dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required); + dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable); + dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported); +#endif + dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes); + + s->VStartupLines = s->VStartupLines + 1; + + if (s->VStartupLines > s->MaxVStartupAllPlanes) { + s->VStartupLines = __DML_VBA_MIN_VSTARTUP__; + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1; + + if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k]) + s->AllPrefetchModeTested = false; + dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested); + } + } else { + s->AllPrefetchModeTested = false; + } + s->iteration++; + if (s->iteration > 2500) { + dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__); + ASSERT(0); + } + } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested)); + + if (locals->PrefetchAndImmediateFlipSupported) { + dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes); + } else { + dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + { + s->mmSOCParameters.UrgentLatency = locals->UrgentLatency; + s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency; + s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us; + s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us; + s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us; + s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us; + + CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal; + CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = locals->Dcfclk; + CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW; + CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal; + CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay; + CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes; + CalculateWatermarks_params->meta_row_height = locals->meta_row_height; + CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY; + CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC; + CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel; + CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY; + CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC; + CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio; + CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma; + CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps; + CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma; + CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma; + CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY; + CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC; + CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler; + CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat; + CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth; + CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight; + CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight; + CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; + + // Output + CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &mode_lib->scratch, + CalculateWatermarks_params); + + /* Copy the calculated watermarks to mp.Watermark as the getter functions are + * implemented by the DML team to copy the calculated values from the mp.Watermark interface. + * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to + * unexpected behavior. memmove should be used. + */ + memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark); + locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / + mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark); + } else { + locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0; + locals->WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + } + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->VRatioPrefetchY, + locals->VRatioPrefetchC, + locals->swath_width_luma_ub, + locals->swath_width_chroma_ub, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.cache_display_cfg.plane.HRatio, + mode_lib->ms.cache_display_cfg.plane.HRatioChroma, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->PSCL_THROUGHPUT, + locals->PSCL_THROUGHPUT_CHROMA, + locals->Dppclk, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + mode_lib->ms.cache_display_cfg.plane.NumberOfCursors, + mode_lib->ms.cache_display_cfg.plane.CursorWidth, + mode_lib->ms.cache_display_cfg.plane.CursorBPP, + locals->BlockWidth256BytesY, + locals->BlockHeight256BytesY, + locals->BlockWidth256BytesC, + locals->BlockHeight256BytesC, + + /* Output */ + locals->DisplayPipeLineDeliveryTimeLuma, + locals->DisplayPipeLineDeliveryTimeChroma, + locals->DisplayPipeLineDeliveryTimeLumaPrefetch, + locals->DisplayPipeLineDeliveryTimeChromaPrefetch, + locals->DisplayPipeRequestDeliveryTimeLuma, + locals->DisplayPipeRequestDeliveryTimeChroma, + locals->DisplayPipeRequestDeliveryTimeLumaPrefetch, + locals->DisplayPipeRequestDeliveryTimeChromaPrefetch, + locals->CursorRequestDeliveryTime, + locals->CursorRequestDeliveryTimePrefetch); + + CalculateMetaAndPTETimes( + locals->use_one_row_for_frame, + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.ip.meta_chunk_size_kbytes, + mode_lib->ms.ip.min_meta_chunk_size_bytes, + mode_lib->ms.cache_display_cfg.timing.HTotal, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.VRatioChroma, + locals->DestinationLinesToRequestRowInVBlank, + locals->DestinationLinesToRequestRowInImmediateFlip, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->BytePerPixelY, + locals->BytePerPixelC, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + locals->dpte_row_height, + locals->dpte_row_height_chroma, + locals->meta_row_width, + locals->meta_row_width_chroma, + locals->meta_row_height, + locals->meta_row_height_chroma, + locals->meta_req_width, + locals->meta_req_width_chroma, + locals->meta_req_height, + locals->meta_req_height_chroma, + locals->dpte_group_bytes, + locals->PTERequestSizeY, + locals->PTERequestSizeC, + locals->PixelPTEReqWidthY, + locals->PixelPTEReqHeightY, + locals->PixelPTEReqWidthC, + locals->PixelPTEReqHeightC, + locals->dpte_row_width_luma_ub, + locals->dpte_row_width_chroma_ub, + + /* Output */ + locals->DST_Y_PER_PTE_ROW_NOM_L, + locals->DST_Y_PER_PTE_ROW_NOM_C, + locals->DST_Y_PER_META_ROW_NOM_L, + locals->DST_Y_PER_META_ROW_NOM_C, + locals->TimePerMetaChunkNominal, + locals->TimePerChromaMetaChunkNominal, + locals->TimePerMetaChunkVBlank, + locals->TimePerChromaMetaChunkVBlank, + locals->TimePerMetaChunkFlip, + locals->TimePerChromaMetaChunkFlip, + locals->time_per_pte_group_nom_luma, + locals->time_per_pte_group_vblank_luma, + locals->time_per_pte_group_flip_luma, + locals->time_per_pte_group_nom_chroma, + locals->time_per_pte_group_vblank_chroma, + locals->time_per_pte_group_flip_chroma); + + CalculateVMGroupAndRequestTimes( + mode_lib->ms.num_active_planes, + mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, + mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels, + mode_lib->ms.cache_display_cfg.timing.HTotal, + locals->BytePerPixelC, + locals->DestinationLinesToRequestVMInVBlank, + locals->DestinationLinesToRequestVMInImmediateFlip, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + locals->dpte_row_width_luma_ub, + locals->dpte_row_width_chroma_ub, + locals->vm_group_bytes, + locals->dpde0_bytes_per_frame_ub_l, + locals->dpde0_bytes_per_frame_ub_c, + locals->meta_pte_bytes_per_frame_ub_l, + locals->meta_pte_bytes_per_frame_ub_c, + + /* Output */ + locals->TimePerVMGroupVBlank, + locals->TimePerVMGroupFlip, + locals->TimePerVMRequestVBlank, + locals->TimePerVMRequestFlip); + + // Min TTUVBlank + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (locals->PrefetchMode[k] == 0) { + locals->MinTTUVBlank[k] = dml_max4( + locals->Watermark.DRAMClockChangeWatermark, + locals->Watermark.FCLKChangeWatermark, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else if (locals->PrefetchMode[k] == 1) { + locals->MinTTUVBlank[k] = dml_max3( + locals->Watermark.FCLKChangeWatermark, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else if (locals->PrefetchMode[k] == 2) { + locals->MinTTUVBlank[k] = dml_max( + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.UrgentWatermark); + } else { + locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark; + } + if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]) + locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k]; + } + + // DCC Configuration + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + mode_lib->ms.cache_display_cfg.surface.DCCEnable[k], + mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal, + mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k], + mode_lib->ms.NomDETInKByte, + locals->BlockHeight256BytesY[k], + locals->BlockHeight256BytesC[k], + mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k], + locals->BytePerPixelY[k], + locals->BytePerPixelC[k], + locals->BytePerPixelDETY[k], + locals->BytePerPixelDETC[k], + mode_lib->ms.cache_display_cfg.plane.SourceScan[k], + /* Output */ + &locals->DCCYMaxUncompressedBlock[k], + &locals->DCCCMaxUncompressedBlock[k], + &locals->DCCYMaxCompressedBlock[k], + &locals->DCCCMaxCompressedBlock[k], + &locals->DCCYIndependentBlock[k], + &locals->DCCCIndependentBlock[k]); + } + + // VStartup Adjustment + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]; +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]); +#endif + + locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]); +#endif + + locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin; + if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) { + locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin; + } + + isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported); + + // The actual positioning of the vstartup + locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : + mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); + s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; + s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k]; + + if (s->blank_lines_remaining < 0) { + dml_print("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + ASSERT(0); + } + locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) : + mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) + + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0)) + + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0; + + if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <= + (isInterlaceTiming ? + dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) : + (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) { + locals->VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + locals->VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]); + dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]); + dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]); + dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]); + dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]); + dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]); + dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]); + dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]); + dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]); + dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]); + dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]); + dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START); + dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + s->TotalWRBandwidth = 0; + s->WRBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) { + s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4; + } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) { + s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] / + (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8; + } + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; + } + + locals->TotalDataReadBandwidth = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]); + dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]); +#endif + } + + locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) { + locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch + + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k]; + } + } + + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us; + CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal; + CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported; + CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace; + CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface; + CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY; + CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC; + CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma; + CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma; + CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma; + CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma; + CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal; + CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal; + CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock; + CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio; + CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan; + CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive; + CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable; + CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma; + CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw; + CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw; + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, + CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + { + dml_float_t dummy_single[2]; + dml_uint_t dummy_integer[1]; + dml_bool_t dummy_boolean[1]; + + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency( + locals->CompressedBufferSizeInkByte, + mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange, + locals->UnboundedRequestEnabled, + mode_lib->ms.ip.meta_fifo_size_in_kentries, + mode_lib->ms.ip.zero_size_buffer_entries, + mode_lib->ms.ip.pixel_chunk_size_kbytes, + mode_lib->ms.num_active_planes, + mode_lib->ms.ip.rob_buffer_size_kbytes, + locals->TotalDataReadBandwidth, + locals->Dcfclk, + mode_lib->ms.ReturnBW, + 0, //mode_lib->ms.ip.compbuf_reserved_space_64b, + 0, //mode_lib->ms.ip.compbuf_reserved_space_zs, + mode_lib->ms.state.sr_exit_time_us, + mode_lib->ms.state.sr_exit_z8_time_us, + mode_lib->ms.policy.SynchronizeTimingsFinal, + mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming, + locals->Watermark.StutterEnterPlusExitWatermark, + locals->Watermark.Z8StutterEnterPlusExitWatermark, + mode_lib->ms.ip.ptoi_supported, + mode_lib->ms.cache_display_cfg.timing.Interlace, + locals->MinTTUVBlank, + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, + mode_lib->ms.DETBufferSizeY, + locals->BytePerPixelY, + locals->BytePerPixelDETY, + locals->SwathWidthY, + mode_lib->ms.SwathHeightY, + mode_lib->ms.SwathHeightC, + mode_lib->ms.cache_display_cfg.surface.DCCRateLuma, + mode_lib->ms.cache_display_cfg.surface.DCCRateChroma, + mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma, + mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma, + mode_lib->ms.cache_display_cfg.timing.HTotal, + mode_lib->ms.cache_display_cfg.timing.VTotal, + mode_lib->ms.cache_display_cfg.timing.PixelClock, + mode_lib->ms.cache_display_cfg.plane.VRatio, + mode_lib->ms.cache_display_cfg.plane.SourceScan, + locals->BlockHeight256BytesY, + locals->BlockWidth256BytesY, + locals->BlockHeight256BytesC, + locals->BlockWidth256BytesC, + locals->DCCYMaxUncompressedBlock, + locals->DCCCMaxUncompressedBlock, + mode_lib->ms.cache_display_cfg.timing.VActive, + mode_lib->ms.cache_display_cfg.surface.DCCEnable, + mode_lib->ms.cache_display_cfg.writeback.WritebackEnable, + locals->ReadBandwidthSurfaceLuma, + locals->ReadBandwidthSurfaceChroma, + locals->meta_row_bw, + locals->dpte_row_bw, + + /* Output */ + &dummy_single[0], + &dummy_single[1], + &dummy_integer[0], + &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase, + &locals->Z8StutterEfficiencyBestCase, + &locals->Z8NumberOfStutterBurstsPerFrameBestCase, + &locals->StutterPeriodBestCase, + &dummy_boolean[0]); + } +#else + locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank; + locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency; + locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame; + locals->StutterPeriodBestCase = locals->StutterPeriod; +#endif + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: --- END --- \n", __func__); +#endif +} // dml_core_mode_programming + +/// Function: dml_core_get_row_heights +/// @brief Get row height for DPTE and META with minimal input. +void dml_core_get_row_heights( + dml_uint_t *dpte_row_height, + dml_uint_t *meta_row_height, + const struct display_mode_lib_st *mode_lib, + dml_bool_t is_plane1, + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + enum dml_rotation_angle ScanDirection, + dml_uint_t pitch, + dml_uint_t GPUVMMinPageSizeKBytes) +{ + dml_uint_t BytePerPixelY; + dml_uint_t BytePerPixelC; + dml_float_t BytePerPixelInDETY; + dml_float_t BytePerPixelInDETC; + dml_uint_t BlockHeight256BytesY; + dml_uint_t BlockHeight256BytesC; + dml_uint_t BlockWidth256BytesY; + dml_uint_t BlockWidth256BytesC; + dml_uint_t MacroTileWidthY; + dml_uint_t MacroTileWidthC; + dml_uint_t MacroTileHeightY; + dml_uint_t MacroTileHeightC; + + dml_uint_t BytePerPixel; + dml_uint_t BlockHeight256Bytes; + dml_uint_t BlockWidth256Bytes; + dml_uint_t MacroTileWidth; + dml_uint_t MacroTileHeight; + dml_uint_t PTEBufferSizeInRequests; + + dml_uint_t dummy_integer[16]; + + CalculateBytePerPixelAndBlockSizes( + SourcePixelFormat, + SurfaceTiling, + + /* Output */ + &BytePerPixelY, + &BytePerPixelC, + &BytePerPixelInDETY, + &BytePerPixelInDETC, + &BlockHeight256BytesY, + &BlockHeight256BytesC, + &BlockWidth256BytesY, + &BlockWidth256BytesC, + &MacroTileHeightY, + &MacroTileHeightC, + &MacroTileWidthY, + &MacroTileWidthC); + + BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; + BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; + BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; + MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; + MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; + PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1); + dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); +#endif + + // just supply with enough parameters to calculate meta and dte + CalculateVMAndRowBytes( + 0, // dml_bool_t ViewportStationary, + 1, // dml_bool_t DCCEnable, + 1, // dml_uint_t NumberOfDPPs, + BlockHeight256Bytes, + BlockWidth256Bytes, + SourcePixelFormat, + SurfaceTiling, + BytePerPixel, + ScanDirection, + 0, // dml_uint_t SwathWidth, + 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful) + 0, // dml_uint_t ViewportXStart, + 0, // dml_uint_t ViewportYStart, + 1, // dml_bool_t GPUVMEnable, + 4, // dml_uint_t GPUVMMaxPageTableLevels, + GPUVMMinPageSizeKBytes, + PTEBufferSizeInRequests, + pitch, + 0, // dml_uint_t DCCMetaPitch, + MacroTileWidth, + MacroTileHeight, + + // /* Output */ + &dummy_integer[0], // dml_uint_t *MetaRowByte, + &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow, + &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage, + &dummy_integer[3], // dml_uint_t *dpte_row_width_ub, + dpte_row_height, + &dummy_integer[4], // dml_uint_t *dpte_row_height_linear + &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame, + &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame, + &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame, + &dummy_integer[8], // dml_uint_t *MetaRequestWidth, + &dummy_integer[9], // dml_uint_t *MetaRequestHeight, + &dummy_integer[10], // dml_uint_t *meta_row_width, + meta_row_height, + &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth, + &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight, + &dummy_integer[13], // dml_uint_t *PTERequestSize, + &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame, + &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame) + +#ifdef __DML_RQ_DLG_CALC_DEBUG__ + dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); + dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height); +#endif +} + +static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box( + const struct soc_states_st *states, + dml_uint_t state_idx) +{ + dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states); + + if (state_idx >= (dml_uint_t)states->num_states) { + dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states); + ASSERT(0); + } + return (states->state_array[state_idx]); +} + +/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have +/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated. +/// +static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx) +{ + mode_lib->ms.state_idx = state_idx; + mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1; + mode_lib->ms.soc = mode_lib->soc; + mode_lib->ms.ip = mode_lib->ip; + mode_lib->ms.policy = mode_lib->policy; + mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx); + mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1); +} + +static void cache_display_cfg(struct display_mode_lib_st *mode_lib, + const struct dml_display_cfg_st *display_cfg) +{ + mode_lib->ms.cache_display_cfg = *display_cfg; +} + +static void fetch_socbb_params(struct display_mode_lib_st *mode_lib) +{ + struct soc_state_bounding_box_st *state = &mode_lib->ms.state; + + // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step + // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support + mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz; + mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts; + mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz; + mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz; +} + +/// @brief Use display_cfg directly for mode_support calculation +/// Calculated values and informational output are stored in mode_lib.vba data struct +/// The display configuration is described with pipes struct and num_pipes +/// This function is used when physical resource mapping is not finalized (for example, +/// don't know how many pipes to represent a surface) +/// @param mode_lib Contains the bounding box and policy setting. +/// @param state_idx Power state index +/// @param display_cfg Display configurations. A display +dml_bool_t dml_mode_support( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg) +{ + dml_bool_t is_mode_support; + + dml_print("DML::%s: ------------- START ----------\n", __func__); + cache_ip_soc_cfg(mode_lib, state_idx); + cache_display_cfg(mode_lib, display_cfg); + + fetch_socbb_params(mode_lib); + + dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); + + is_mode_support = dml_core_mode_support(mode_lib); + + dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support); + dml_print("DML::%s: ------------- DONE ----------\n", __func__); + return is_mode_support; +} + +/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported) +/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions +/// Calculated values include: watermarks, dlg, rq reg, different clock frequency +/// This function returns 1 when there is no error. +/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK +/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. +/// @param state_idx Power state idx chosen +/// @param display_cfg Display Configuration +/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming +/// TODO: Add clk_cfg input, could be useful for standalone mode +dml_bool_t dml_mode_programming( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg, + bool call_standalone) +{ + struct dml_clk_cfg_st clk_cfg; + memset(&clk_cfg, 0, sizeof(clk_cfg)); + + clk_cfg.dcfclk_option = dml_use_required_freq; + clk_cfg.dispclk_option = dml_use_required_freq; + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) + clk_cfg.dppclk_option[k] = dml_use_required_freq; + + dml_print("DML::%s: ------------- START ----------\n", __func__); + dml_print("DML::%s: state_idx = %u\n", __func__, state_idx); + dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone); + + cache_ip_soc_cfg(mode_lib, state_idx); + cache_display_cfg(mode_lib, display_cfg); + + fetch_socbb_params(mode_lib); + if (call_standalone) { + mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine + dml_core_mode_support_partial(mode_lib); + } + + dml_core_mode_programming(mode_lib, &clk_cfg); + + dml_print("DML::%s: ------------- DONE ----------\n", __func__); + dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); + return mode_lib->mp.PrefetchAndImmediateFlipSupported; +} + +static dml_uint_t mode_support_pwr_states( + dml_uint_t *lowest_state_idx, + struct display_mode_lib_st *mode_lib, + const struct dml_display_cfg_st *display_cfg, + dml_uint_t start_state_idx, + dml_uint_t end_state_idx) +{ + dml_uint_t state_idx = 0; + dml_bool_t mode_is_supported = 0; + *lowest_state_idx = end_state_idx; + + if (end_state_idx < start_state_idx) + ASSERT(0); + + if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based + ASSERT(0); + + for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) { + if (dml_mode_support(mode_lib, state_idx, display_cfg)) { + dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx); + mode_is_supported = 1; + *lowest_state_idx = state_idx; + break; + } + } + + return mode_is_supported; +} + +dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params) +{ + dml_uint_t result; + + result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx, + in_out_params->mode_lib, + in_out_params->in_display_cfg, + in_out_params->in_start_state_idx, + in_out_params->mode_lib->states.num_states - 1); + + if (result) + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + + return result; +} + +dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]); + return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe); +} + + +#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \ +{ \ + dml_uint_t plane_idx; \ + plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \ + return (type) interval_var[plane_idx]; \ +} + +#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \ +{ \ + return (type) internal_var; \ +} + +dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark); +dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark); +dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); +dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency); +dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark); +dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark); +dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark); +dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); +dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark); +dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth); +dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); +dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency); +dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep); +dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); +dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark); +dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency); +dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); +dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency); +dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod); +dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase); +dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); +dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase); +dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency); +dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); +dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated); +dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth); +dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW); +dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW); +dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc); +dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte); +dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes); +dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes); +dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes); +dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes); +dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes); +dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes); + +dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency +dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated); +dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated); +dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank); +dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY); +dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC); +dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler); +dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler); +dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank); +dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank); +dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch); +dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip); +dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip); +dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L); +dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C); +dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L); +dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C); +dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank); +dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip); +dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank); +dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip); +dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm); +dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl); +dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma); +dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma); +dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch); +dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch); +dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma); +dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma); +dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch); +dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); +dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime); +dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip); +dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip); +dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma); +dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma); +dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma); +dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma); +dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes); +dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes); +dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY); +dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC); +dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height); +dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma); +dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear); +dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma); +dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height); +dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma); + +dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup); +dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix); +dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix); +dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix); +dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC); +dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START); +dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY); +dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC); +dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen); +dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL); +dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock); +dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock); +dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock); +dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock); +dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock); +dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock); +dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported); +dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE); +dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE); +dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow); +dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte); +dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte); + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h new file mode 100644 index 000000000000..a38ed89c47a9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core.h @@ -0,0 +1,204 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_CORE_H__ +#define __DISPLAY_MODE_CORE_H__ + +#include "display_mode_core_structs.h" + +struct display_mode_lib_st; + +dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib); +void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib); +void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg); + +void dml_core_get_row_heights( + dml_uint_t *dpte_row_height, + dml_uint_t *meta_row_height, + const struct display_mode_lib_st *mode_lib, + dml_bool_t is_plane1, + enum dml_source_format_class SourcePixelFormat, + enum dml_swizzle_mode SurfaceTiling, + enum dml_rotation_angle ScanDirection, + dml_uint_t pitch, + dml_uint_t GPUVMMinPageSizeKBytes); + +dml_float_t dml_get_return_bw_mbps_vm_only( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed); + +dml_float_t dml_get_return_bw_mbps( + const struct soc_bounding_box_st *soc, + dml_bool_t use_ideal_dram_bw_strobe, + dml_bool_t HostVMEnable, + dml_float_t DCFCLK, + dml_float_t FabricClock, + dml_float_t DRAMSpeed); + +dml_bool_t dml_mode_support( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg); + +dml_bool_t dml_mode_programming( + struct display_mode_lib_st *mode_lib, + dml_uint_t state_idx, + const struct dml_display_cfg_st *display_cfg, + bool call_standalone); + +dml_uint_t dml_mode_support_ex( + struct dml_mode_support_ex_params_st *in_out_params); + +dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); + +#define dml_get_per_surface_var_decl(variable, type) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) +#define dml_get_var_decl(var, type) type dml_get_##var(struct display_mode_lib_st *mode_lib) + +dml_get_var_decl(wm_urgent, dml_float_t); +dml_get_var_decl(wm_stutter_exit, dml_float_t); +dml_get_var_decl(wm_stutter_enter_exit, dml_float_t); +dml_get_var_decl(wm_memory_trip, dml_float_t); +dml_get_var_decl(wm_dram_clock_change, dml_float_t); +dml_get_var_decl(wm_z8_stutter_enter_exit, dml_float_t); +dml_get_var_decl(wm_z8_stutter, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); +dml_get_var_decl(clk_dcf_deepsleep, dml_float_t); +dml_get_var_decl(wm_fclk_change, dml_float_t); +dml_get_var_decl(wm_usr_retraining, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); + +dml_get_var_decl(wm_writeback_dram_clock_change, dml_float_t); +dml_get_var_decl(wm_writeback_urgent, dml_float_t); +dml_get_var_decl(stutter_efficiency_no_vblank, dml_float_t); +dml_get_var_decl(stutter_efficiency, dml_float_t); +dml_get_var_decl(stutter_efficiency_z8, dml_float_t); +dml_get_var_decl(stutter_num_bursts_z8, dml_float_t); +dml_get_var_decl(stutter_period, dml_float_t); +dml_get_var_decl(stutter_efficiency_z8_bestcase, dml_float_t); +dml_get_var_decl(stutter_num_bursts_z8_bestcase, dml_float_t); +dml_get_var_decl(stutter_period_bestcase, dml_float_t); +dml_get_var_decl(urgent_latency, dml_float_t); +dml_get_var_decl(urgent_extra_latency, dml_float_t); +dml_get_var_decl(fclk_change_latency, dml_float_t); +dml_get_var_decl(nonurgent_latency, dml_float_t); +dml_get_var_decl(dispclk_calculated, dml_float_t); +dml_get_var_decl(total_data_read_bw, dml_float_t); +dml_get_var_decl(return_bw, dml_float_t); +dml_get_var_decl(return_dram_bw, dml_float_t); +dml_get_var_decl(tcalc, dml_float_t); +dml_get_var_decl(fraction_of_urgent_bandwidth, dml_float_t); +dml_get_var_decl(fraction_of_urgent_bandwidth_imm_flip, dml_float_t); +dml_get_var_decl(comp_buffer_size_kbytes, dml_uint_t); +dml_get_var_decl(pixel_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(alpha_pixel_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(meta_chunk_size_in_kbyte, dml_uint_t); +dml_get_var_decl(min_pixel_chunk_size_in_byte, dml_uint_t); +dml_get_var_decl(min_meta_chunk_size_in_byte, dml_uint_t); +dml_get_var_decl(total_immediate_flip_bytes, dml_uint_t); + +dml_get_per_surface_var_decl(dsc_delay, dml_uint_t); +dml_get_per_surface_var_decl(dppclk_calculated, dml_float_t); +dml_get_per_surface_var_decl(dscclk_calculated, dml_float_t); +dml_get_per_surface_var_decl(min_ttu_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(vratio_prefetch_l, dml_float_t); +dml_get_per_surface_var_decl(vratio_prefetch_c, dml_float_t); +dml_get_per_surface_var_decl(dst_x_after_scaler, dml_uint_t); +dml_get_per_surface_var_decl(dst_y_after_scaler, dml_uint_t); +dml_get_per_surface_var_decl(dst_y_per_vm_vblank, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_row_vblank, dml_float_t); +dml_get_per_surface_var_decl(dst_y_prefetch, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_vm_flip, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_row_flip, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_l, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_c, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_l, dml_float_t); +dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_c, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_group_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_group_flip_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_req_vblank_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_req_flip_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_vm_dmdata_in_us, dml_float_t); +dml_get_per_surface_var_decl(dmdata_dl_delta_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_c_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_l_in_us, dml_float_t); +dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_c_in_us, dml_float_t); + +dml_get_per_surface_var_decl(dpte_group_size_in_bytes, dml_uint_t); +dml_get_per_surface_var_decl(vm_group_size_in_bytes, dml_uint_t); +dml_get_per_surface_var_decl(swath_height_l, dml_uint_t); +dml_get_per_surface_var_decl(swath_height_c, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_l, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_c, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_linear_l, dml_uint_t); +dml_get_per_surface_var_decl(dpte_row_height_linear_c, dml_uint_t); +dml_get_per_surface_var_decl(meta_row_height_l, dml_uint_t); +dml_get_per_surface_var_decl(meta_row_height_c, dml_uint_t); +dml_get_per_surface_var_decl(vstartup_calculated, dml_uint_t); +dml_get_per_surface_var_decl(vupdate_offset, dml_uint_t); +dml_get_per_surface_var_decl(vupdate_width, dml_uint_t); +dml_get_per_surface_var_decl(vready_offset, dml_uint_t); +dml_get_per_surface_var_decl(vready_at_or_after_vsync, dml_uint_t); +dml_get_per_surface_var_decl(min_dst_y_next_start, dml_uint_t); +dml_get_per_surface_var_decl(det_stored_buffer_size_l_bytes, dml_uint_t); +dml_get_per_surface_var_decl(det_stored_buffer_size_c_bytes, dml_uint_t); +dml_get_per_surface_var_decl(use_mall_for_static_screen, dml_uint_t); +dml_get_per_surface_var_decl(surface_size_for_mall, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_uncompressed_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_uncompressed_block_c, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_compressed_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_max_compressed_block_c, dml_uint_t); +dml_get_per_surface_var_decl(dcc_independent_block_l, dml_uint_t); +dml_get_per_surface_var_decl(dcc_independent_block_c, dml_uint_t); +dml_get_per_surface_var_decl(max_active_dram_clock_change_latency_supported, dml_uint_t); +dml_get_per_surface_var_decl(pte_buffer_mode, dml_uint_t); +dml_get_per_surface_var_decl(bigk_fragment_size, dml_uint_t); +dml_get_per_surface_var_decl(dpte_bytes_per_row, dml_uint_t); +dml_get_per_surface_var_decl(meta_bytes_per_row, dml_uint_t); +dml_get_per_surface_var_decl(det_buffer_size_kbytes, dml_uint_t); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h new file mode 100644 index 000000000000..3b1d92e7697f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_core_structs.h @@ -0,0 +1,2032 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_CORE_STRUCT_H__ +#define __DISPLAY_MODE_CORE_STRUCT_H__ + +#include "display_mode_lib_defines.h" +#include "dml_top_display_cfg_types.h" + +enum dml_project_id { + dml_project_invalid = 0, + dml_project_default = 1, + dml_project_dcn32 = dml_project_default, + dml_project_dcn321 = 2, + dml_project_dcn35 = 3, + dml_project_dcn351 = 4, + dml_project_dcn401 = 5, + dml_project_dcn36 = 6, +}; +enum dml_prefetch_modes { + dml_prefetch_support_uclk_fclk_and_stutter_if_possible = 0, + dml_prefetch_support_uclk_fclk_and_stutter = 1, + dml_prefetch_support_fclk_and_stutter = 2, + dml_prefetch_support_stutter = 3, + dml_prefetch_support_none = 4 +}; +enum dml_use_mall_for_pstate_change_mode { + dml_use_mall_pstate_change_disable = 0, + dml_use_mall_pstate_change_full_frame = 1, + dml_use_mall_pstate_change_sub_viewport = 2, + dml_use_mall_pstate_change_phantom_pipe = 3, + dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4, + dml_use_mall_pstate_change_imall = 5 +}; +enum dml_use_mall_for_static_screen_mode { + dml_use_mall_static_screen_disable = 0, + dml_use_mall_static_screen_enable = 1, + dml_use_mall_static_screen_optimize = 2 +}; +enum dml_output_encoder_class { + dml_dp = 0, + dml_edp = 1, + dml_dp2p0 = 2, + dml_hdmi = 3, + dml_hdmifrl = 4, + dml_none = 5 +}; +enum dml_output_link_dp_rate{ + dml_dp_rate_na = 0, + dml_dp_rate_hbr = 1, + dml_dp_rate_hbr2 = 2, + dml_dp_rate_hbr3 = 3, + dml_dp_rate_uhbr10 = 4, + dml_dp_rate_uhbr13p5 = 5, + dml_dp_rate_uhbr20 = 6 +}; +enum dml_output_type_and_rate__type{ + dml_output_type_unknown = 0, + dml_output_type_dp = 1, + dml_output_type_edp = 2, + dml_output_type_dp2p0 = 3, + dml_output_type_hdmi = 4, + dml_output_type_hdmifrl = 5 +}; +enum dml_output_type_and_rate__rate { + dml_output_rate_unknown = 0, + dml_output_rate_dp_rate_hbr = 1, + dml_output_rate_dp_rate_hbr2 = 2, + dml_output_rate_dp_rate_hbr3 = 3, + dml_output_rate_dp_rate_uhbr10 = 4, + dml_output_rate_dp_rate_uhbr13p5 = 5, + dml_output_rate_dp_rate_uhbr20 = 6, + dml_output_rate_hdmi_rate_3x3 = 7, + dml_output_rate_hdmi_rate_6x3 = 8, + dml_output_rate_hdmi_rate_6x4 = 9, + dml_output_rate_hdmi_rate_8x4 = 10, + dml_output_rate_hdmi_rate_10x4 = 11, + dml_output_rate_hdmi_rate_12x4 = 12 +}; +enum dml_output_format_class { + dml_444 = 0, + dml_s422 = 1, + dml_n422 = 2, + dml_420 = 3 +}; +enum dml_source_format_class { + dml_444_8 = 0, + dml_444_16 = 1, + dml_444_32 = 2, + dml_444_64 = 3, + dml_420_8 = 4, + dml_420_10 = 5, + dml_420_12 = 6, + dml_422_8 = 7, + dml_422_10 = 8, + dml_rgbe_alpha = 9, + dml_rgbe = 10, + dml_mono_8 = 11, + dml_mono_16 = 12 +}; +enum dml_output_bpc_class { + dml_out_6 = 0, + dml_out_8 = 1, + dml_out_10 = 2, + dml_out_12 = 3, + dml_out_16 = 4 +}; +enum dml_output_standard_class { + dml_std_cvt = 0, + dml_std_cea = 1, + dml_std_cvtr2 = 2 +}; +enum dml_rotation_angle { + dml_rotation_0 = 0, + dml_rotation_90 = 1, + dml_rotation_180 = 2, + dml_rotation_270 = 3, + dml_rotation_0m = 4, + dml_rotation_90m = 5, + dml_rotation_180m = 6, + dml_rotation_270m = 7 +}; +enum dml_swizzle_mode { + dml_sw_linear = 0, + dml_sw_256b_s = 1, + dml_sw_256b_d = 2, + dml_sw_256b_r = 3, + dml_sw_4kb_z = 4, + dml_sw_4kb_s = 5, + dml_sw_4kb_d = 6, + dml_sw_4kb_r = 7, + dml_sw_64kb_z = 8, + dml_sw_64kb_s = 9, + dml_sw_64kb_d = 10, + dml_sw_64kb_r = 11, + dml_sw_256kb_z = 12, + dml_sw_256kb_s = 13, + dml_sw_256kb_d = 14, + dml_sw_256kb_r = 15, + dml_sw_64kb_z_t = 16, + dml_sw_64kb_s_t = 17, + dml_sw_64kb_d_t = 18, + dml_sw_64kb_r_t = 19, + dml_sw_4kb_z_x = 20, + dml_sw_4kb_s_x = 21, + dml_sw_4kb_d_x = 22, + dml_sw_4kb_r_x = 23, + dml_sw_64kb_z_x = 24, + dml_sw_64kb_s_x = 25, + dml_sw_64kb_d_x = 26, + dml_sw_64kb_r_x = 27, + dml_sw_256kb_z_x = 28, + dml_sw_256kb_s_x = 29, + dml_sw_256kb_d_x = 30, + dml_sw_256kb_r_x = 31, + dml_sw_256b_2d = 32, + dml_sw_4kb_2d = 33, + dml_sw_64kb_2d = 34, + dml_sw_256kb_2d = 35 +}; +enum dml_lb_depth { + dml_lb_6 = 0, + dml_lb_8 = 1, + dml_lb_10 = 2, + dml_lb_12 = 3, + dml_lb_16 = 4 +}; +enum dml_voltage_state { + dml_vmin_lv = 0, + dml_vmin = 1, + dml_vmid = 2, + dml_vnom = 3, + dml_vmax = 4 +}; +enum dml_source_macro_tile_size { + dml_4k_tile = 0, + dml_64k_tile = 1, + dml_256k_tile = 2 +}; +enum dml_cursor_bpp { + dml_cur_2bit = 0, + dml_cur_32bit = 1, + dml_cur_64bit = 2 +}; +enum dml_dram_clock_change_support { + dml_dram_clock_change_vactive = 0, + dml_dram_clock_change_vblank = 1, + dml_dram_clock_change_vblank_drr = 2, + dml_dram_clock_change_vactive_w_mall_full_frame = 3, + dml_dram_clock_change_vactive_w_mall_sub_vp = 4, + dml_dram_clock_change_vblank_w_mall_full_frame = 5, + dml_dram_clock_change_vblank_drr_w_mall_full_frame = 6, + dml_dram_clock_change_vblank_w_mall_sub_vp = 7, + dml_dram_clock_change_vblank_drr_w_mall_sub_vp = 8, + dml_dram_clock_change_unsupported = 9 +}; +enum dml_fclock_change_support { + dml_fclock_change_vactive = 0, + dml_fclock_change_vblank = 1, + dml_fclock_change_unsupported = 2 +}; +enum dml_dsc_enable { + dml_dsc_disable = 0, + dml_dsc_enable = 1, + dml_dsc_enable_if_necessary = 2 +}; +enum dml_mpc_use_policy { + dml_mpc_disabled = 0, + dml_mpc_as_possible = 1, + dml_mpc_as_needed_for_voltage = 2, + dml_mpc_as_needed_for_pstate_and_voltage = 3, + dml_mpc_as_needed = 4, + dml_mpc_2to1 = 5 +}; +enum dml_odm_use_policy { + dml_odm_use_policy_bypass = 0, + dml_odm_use_policy_combine_as_needed = 1, + dml_odm_use_policy_combine_2to1 = 2, + dml_odm_use_policy_combine_3to1 = 3, + dml_odm_use_policy_combine_4to1 = 4, + dml_odm_use_policy_split_1to2 = 5, + dml_odm_use_policy_mso_1to2 = 6, + dml_odm_use_policy_mso_1to4 = 7 +}; +enum dml_odm_mode { + dml_odm_mode_bypass = 0, + dml_odm_mode_combine_2to1 = 1, + dml_odm_mode_combine_3to1 = 2, + dml_odm_mode_combine_4to1 = 3, + dml_odm_mode_split_1to2 = 4, + dml_odm_mode_mso_1to2 = 5, + dml_odm_mode_mso_1to4 = 6 +}; +enum dml_writeback_configuration { + dml_whole_buffer_for_single_stream_no_interleave = 0, + dml_whole_buffer_for_single_stream_interleave = 1 +}; +enum dml_immediate_flip_requirement { + dml_immediate_flip_not_required = 0, + dml_immediate_flip_required = 1, + dml_immediate_flip_if_possible = 2 +}; +enum dml_unbounded_requesting_policy { + dml_unbounded_requesting_enable = 0, + dml_unbounded_requesting_edp_only = 1, + dml_unbounded_requesting_disable = 2 +}; +enum dml_clk_cfg_policy { + dml_use_required_freq = 0, + dml_use_override_freq = 1, + dml_use_state_freq = 2 +}; + +struct soc_state_bounding_box_st { + dml_float_t socclk_mhz; + dml_float_t dscclk_mhz; + dml_float_t phyclk_mhz; + dml_float_t phyclk_d18_mhz; + dml_float_t phyclk_d32_mhz; + dml_float_t dtbclk_mhz; + dml_float_t fabricclk_mhz; + dml_float_t dcfclk_mhz; + dml_float_t dispclk_mhz; + dml_float_t dppclk_mhz; + dml_float_t dram_speed_mts; + dml_float_t urgent_latency_pixel_data_only_us; + dml_float_t urgent_latency_pixel_mixed_with_vm_data_us; + dml_float_t urgent_latency_vm_data_only_us; + dml_float_t writeback_latency_us; + dml_float_t urgent_latency_adjustment_fabric_clock_component_us; + dml_float_t urgent_latency_adjustment_fabric_clock_reference_mhz; + dml_float_t sr_exit_time_us; + dml_float_t sr_enter_plus_exit_time_us; + dml_float_t sr_exit_z8_time_us; + dml_float_t sr_enter_plus_exit_z8_time_us; + dml_float_t dram_clock_change_latency_us; + dml_float_t fclk_change_latency_us; + dml_float_t usr_retraining_latency_us; + dml_bool_t use_ideal_dram_bw_strobe; + dml_float_t g6_temp_read_blackout_us; + + struct { + dml_uint_t urgent_ramp_uclk_cycles; + dml_uint_t trip_to_memory_uclk_cycles; + dml_uint_t meta_trip_to_memory_uclk_cycles; + dml_uint_t maximum_latency_when_urgent_uclk_cycles; + dml_uint_t average_latency_when_urgent_uclk_cycles; + dml_uint_t maximum_latency_when_non_urgent_uclk_cycles; + dml_uint_t average_latency_when_non_urgent_uclk_cycles; + } dml_dcn401_uclk_dpm_dependent_soc_qos_params; +}; + +struct soc_bounding_box_st { + dml_float_t dprefclk_mhz; + dml_float_t xtalclk_mhz; + dml_float_t pcierefclk_mhz; + dml_float_t refclk_mhz; + dml_float_t amclk_mhz; + dml_uint_t max_outstanding_reqs; + dml_float_t pct_ideal_sdp_bw_after_urgent; + dml_float_t pct_ideal_fabric_bw_after_urgent; + dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t pct_ideal_dram_bw_after_urgent_strobe; + dml_float_t max_avg_sdp_bw_use_normal_percent; + dml_float_t max_avg_fabric_bw_use_normal_percent; + dml_float_t max_avg_dram_bw_use_normal_percent; + dml_float_t max_avg_dram_bw_use_normal_strobe_percent; + + dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent; + + dml_uint_t round_trip_ping_latency_dcfclk_cycles; + dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes; + dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + dml_uint_t urgent_out_of_order_return_per_channel_vm_only_bytes; + dml_uint_t num_chans; + dml_uint_t return_bus_width_bytes; + dml_uint_t dram_channel_width_bytes; + dml_uint_t fabric_datapath_to_dcn_data_return_bytes; + dml_uint_t hostvm_min_page_size_kbytes; + dml_uint_t gpuvm_min_page_size_kbytes; + dml_float_t phy_downspread_percent; + dml_float_t dcn_downspread_percent; + dml_float_t smn_latency_us; + dml_uint_t mall_allocated_for_dcn_mbytes; + dml_float_t dispclk_dppclk_vco_speed_mhz; + dml_bool_t do_urgent_latency_adjustment; + + dml_uint_t mem_word_bytes; + dml_uint_t num_dcc_mcaches; + dml_uint_t mcache_size_bytes; + dml_uint_t mcache_line_size_bytes; + + struct { + dml_bool_t UseNewDCN401SOCParameters; + dml_uint_t df_qos_response_time_fclk_cycles; + dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles; + dml_uint_t mall_overhead_fclk_cycles; + dml_uint_t meta_trip_adder_fclk_cycles; + dml_uint_t average_transport_distance_fclk_cycles; + dml_float_t umc_urgent_ramp_latency_margin; + dml_float_t umc_max_latency_margin; + dml_float_t umc_average_latency_margin; + dml_float_t fabric_max_transport_latency_margin; + dml_float_t fabric_average_transport_latency_margin; + } dml_dcn401_soc_qos_params; + +}; + +struct ip_params_st { + dml_uint_t vblank_nom_default_us; + dml_uint_t rob_buffer_size_kbytes; + dml_uint_t config_return_buffer_size_in_kbytes; + dml_uint_t config_return_buffer_segment_size_in_kbytes; + dml_uint_t compressed_buffer_segment_size_in_kbytes; + dml_uint_t meta_fifo_size_in_kentries; + dml_uint_t zero_size_buffer_entries; + dml_uint_t dpte_buffer_size_in_pte_reqs_luma; + dml_uint_t dpte_buffer_size_in_pte_reqs_chroma; + dml_uint_t dcc_meta_buffer_size_bytes; + dml_bool_t gpuvm_enable; + dml_bool_t hostvm_enable; + dml_uint_t gpuvm_max_page_table_levels; + dml_uint_t hostvm_max_page_table_levels; + dml_uint_t pixel_chunk_size_kbytes; + dml_uint_t alpha_pixel_chunk_size_kbytes; + dml_uint_t min_pixel_chunk_size_bytes; + dml_uint_t meta_chunk_size_kbytes; + dml_uint_t min_meta_chunk_size_bytes; + dml_uint_t writeback_chunk_size_kbytes; + dml_uint_t line_buffer_size_bits; + dml_uint_t max_line_buffer_lines; + dml_uint_t writeback_interface_buffer_size_kbytes; + dml_uint_t max_num_dpp; + dml_uint_t max_num_otg; + dml_uint_t max_num_wb; + dml_uint_t max_dchub_pscl_bw_pix_per_clk; + dml_uint_t max_pscl_lb_bw_pix_per_clk; + dml_uint_t max_lb_vscl_bw_pix_per_clk; + dml_uint_t max_vscl_hscl_bw_pix_per_clk; + dml_float_t max_hscl_ratio; + dml_float_t max_vscl_ratio; + dml_uint_t max_hscl_taps; + dml_uint_t max_vscl_taps; + dml_uint_t num_dsc; + dml_uint_t maximum_dsc_bits_per_component; + dml_uint_t maximum_pixels_per_line_per_dsc_unit; + dml_bool_t dsc422_native_support; + dml_bool_t cursor_64bpp_support; + dml_float_t dispclk_ramp_margin_percent; + dml_uint_t dppclk_delay_subtotal; + dml_uint_t dppclk_delay_scl; + dml_uint_t dppclk_delay_scl_lb_only; + dml_uint_t dppclk_delay_cnvc_formatter; + dml_uint_t dppclk_delay_cnvc_cursor; + dml_uint_t cursor_buffer_size; + dml_uint_t cursor_chunk_size; + dml_uint_t dispclk_delay_subtotal; + dml_bool_t dynamic_metadata_vm_enabled; + dml_uint_t max_inter_dcn_tile_repeaters; + dml_uint_t max_num_hdmi_frl_outputs; + dml_uint_t max_num_dp2p0_outputs; + dml_uint_t max_num_dp2p0_streams; + dml_bool_t dcc_supported; + dml_bool_t ptoi_supported; + dml_float_t writeback_max_hscl_ratio; + dml_float_t writeback_max_vscl_ratio; + dml_float_t writeback_min_hscl_ratio; + dml_float_t writeback_min_vscl_ratio; + dml_uint_t writeback_max_hscl_taps; + dml_uint_t writeback_max_vscl_taps; + dml_uint_t writeback_line_buffer_buffer_size; +}; + +struct DmlPipe { + dml_float_t Dppclk; + dml_float_t Dispclk; + dml_float_t PixelClock; + dml_float_t DCFClkDeepSleep; + dml_uint_t DPPPerSurface; + dml_bool_t ScalerEnabled; + enum dml_rotation_angle SourceScan; + dml_uint_t ViewportHeight; + dml_uint_t ViewportHeightChroma; + dml_uint_t BlockWidth256BytesY; + dml_uint_t BlockHeight256BytesY; + dml_uint_t BlockWidth256BytesC; + dml_uint_t BlockHeight256BytesC; + dml_uint_t BlockWidthY; + dml_uint_t BlockHeightY; + dml_uint_t BlockWidthC; + dml_uint_t BlockHeightC; + dml_uint_t InterlaceEnable; + dml_uint_t NumberOfCursors; + dml_uint_t VBlank; + dml_uint_t HTotal; + dml_uint_t HActive; + dml_bool_t DCCEnable; + enum dml_odm_mode ODMMode; + enum dml_source_format_class SourcePixelFormat; + enum dml_swizzle_mode SurfaceTiling; + dml_uint_t BytePerPixelY; + dml_uint_t BytePerPixelC; + dml_bool_t ProgressiveToInterlaceUnitInOPP; + dml_float_t VRatio; + dml_float_t VRatioChroma; + dml_uint_t VTaps; + dml_uint_t VTapsChroma; + dml_uint_t PitchY; + dml_uint_t DCCMetaPitchY; + dml_uint_t PitchC; + dml_uint_t DCCMetaPitchC; + dml_bool_t ViewportStationary; + dml_uint_t ViewportXStart; + dml_uint_t ViewportYStart; + dml_uint_t ViewportXStartC; + dml_uint_t ViewportYStartC; + dml_bool_t FORCE_ONE_ROW_FOR_FRAME; + dml_uint_t SwathHeightY; + dml_uint_t SwathHeightC; +}; + +struct Watermarks { + dml_float_t UrgentWatermark; + dml_float_t WritebackUrgentWatermark; + dml_float_t DRAMClockChangeWatermark; + dml_float_t FCLKChangeWatermark; + dml_float_t WritebackDRAMClockChangeWatermark; + dml_float_t WritebackFCLKChangeWatermark; + dml_float_t StutterExitWatermark; + dml_float_t StutterEnterPlusExitWatermark; + dml_float_t Z8StutterExitWatermark; + dml_float_t Z8StutterEnterPlusExitWatermark; + dml_float_t USRRetrainingWatermark; +}; + +struct SOCParametersList { + dml_float_t UrgentLatency; + dml_float_t ExtraLatency; + dml_float_t WritebackLatency; + dml_float_t DRAMClockChangeLatency; + dml_float_t FCLKChangeLatency; + dml_float_t SRExitTime; + dml_float_t SREnterPlusExitTime; + dml_float_t SRExitZ8Time; + dml_float_t SREnterPlusExitZ8Time; + dml_float_t USRRetrainingLatency; + dml_float_t SMNLatency; +}; + +/// @brief Struct that represent Plane configration of a display cfg +struct dml_plane_cfg_st { + // + // Pipe/Surface Parameters + // + dml_bool_t GPUVMEnable; /// > 23) & 255) - 128; + + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + in = ((-1.0f / 3) * in + 2) * in - 2.0f / 3; + + return (in + log_2); +} + +dml_bool_t dml_util_is_420(enum dml_source_format_class source_format) +{ + dml_bool_t val = false; + + switch (source_format) { + case dml_444_16: + val = 0; + break; + case dml_444_32: + val = 0; + break; + case dml_444_64: + val = 0; + break; + case dml_420_8: + val = 1; + break; + case dml_420_10: + val = 1; + break; + case dml_422_8: + val = 0; + break; + case dml_422_10: + val = 0; + break; + default: + ASSERT(0); + break; + } + return val; +} + +static inline float dcn_bw_pow(float a, float exp) +{ + float temp; + /*ASSERT(exp == (int)exp);*/ + if ((int)exp == 0) + return 1; + temp = dcn_bw_pow(a, (int)(exp / 2)); + if (((int)exp % 2) == 0) { + return temp * temp; + } else { + if ((int)exp > 0) + return a * temp * temp; + else + return (temp * temp) / a; + } +} + +static inline float dcn_bw_ceil2(const float arg, const float significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance + 0.99999)) * significance; +} + +static inline float dcn_bw_floor2(const float arg, const float significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance)) * significance; +} + +dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity) +{ + if (granularity == 0) + return 0; + //return (dml_float_t) (ceil(x / granularity) * granularity); + return (dml_float_t)dcn_bw_ceil2(x, granularity); +} + +dml_float_t dml_floor(dml_float_t x, dml_float_t granularity) +{ + if (granularity == 0) + return 0; + //return (dml_float_t) (floor(x / granularity) * granularity); + return (dml_float_t)dcn_bw_floor2(x, granularity); +} + +dml_float_t dml_min(dml_float_t x, dml_float_t y) +{ + if (x != x) + return y; + if (y != y) + return x; + if (x < y) + return x; + else + return y; +} + +dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z) +{ + return dml_min(dml_min(x, y), z); +} + +dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w) +{ + return dml_min(dml_min(dml_min(x, y), z), w); +} + +dml_float_t dml_max(dml_float_t x, dml_float_t y) +{ + if (x != x) + return y; + if (y != y) + return x; +if (x > y) + return x; + else + return y; +} +dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z) +{ + return dml_max(dml_max(x, y), z); +} +dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d) +{ + return dml_max(dml_max(a, b), dml_max(c, d)); +} +dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e) +{ + return dml_max(dml_max4(a, b, c, d), e); +} +dml_float_t dml_log(dml_float_t x, dml_float_t base) +{ + return (dml_float_t) (_log(x) / _log(base)); +} + +dml_float_t dml_log2(dml_float_t x) +{ + return (dml_float_t) (_log(x) / _log(2)); +} + +dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding) +{ +// if (bankers_rounding) +// return (dml_float_t) lrint(val); +// else { +// return round(val); + double round_pt = 0.5; + double ceil = dml_ceil(val, 1); + double floor = dml_floor(val, 1); + + if (val - floor >= round_pt) + return ceil; + else + return floor; +// } +} + +dml_float_t dml_pow(dml_float_t base, int exp) +{ + return (dml_float_t) dcn_bw_pow(base, exp); +} + +dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up) +{ + dml_uint_t remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n"); + dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size); + dml_print("DML: min_chunk_size = 0x%x\n", rq_regs->min_chunk_size); + dml_print("DML: meta_chunk_size = 0x%x\n", rq_regs->meta_chunk_size); + dml_print("DML: min_meta_chunk_size = 0x%x\n", rq_regs->min_meta_chunk_size); + dml_print("DML: dpte_group_size = 0x%x\n", rq_regs->dpte_group_size); + dml_print("DML: mpte_group_size = 0x%x\n", rq_regs->mpte_group_size); + dml_print("DML: swath_height = 0x%x\n", rq_regs->swath_height); + dml_print("DML: pte_row_height_linear = 0x%x\n", rq_regs->pte_row_height_linear); + dml_print("DML: ===================================== \n"); +} + +void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_RQ_REGS_ST\n"); + dml_print("DML: \n"); + dml_print_data_rq_regs_st(&rq_regs->rq_regs_l); + dml_print("DML: \n"); + dml_print_data_rq_regs_st(&rq_regs->rq_regs_c); + dml_print("DML: drq_expansion_mode = 0x%x\n", rq_regs->drq_expansion_mode); + dml_print("DML: prq_expansion_mode = 0x%x\n", rq_regs->prq_expansion_mode); + dml_print("DML: mrq_expansion_mode = 0x%x\n", rq_regs->mrq_expansion_mode); + dml_print("DML: crq_expansion_mode = 0x%x\n", rq_regs->crq_expansion_mode); + dml_print("DML: plane1_base_address = 0x%x\n", rq_regs->plane1_base_address); + dml_print("DML: ===================================== \n"); +} + +void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_DLG_REGS_ST \n"); + dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end); + dml_print("DML: dlg_vblank_end = 0x%x\n", dlg_regs->dlg_vblank_end); + dml_print("DML: min_dst_y_next_start = 0x%x\n", dlg_regs->min_dst_y_next_start); + dml_print("DML: refcyc_per_htotal = 0x%x\n", dlg_regs->refcyc_per_htotal); + dml_print("DML: refcyc_x_after_scaler = 0x%x\n", dlg_regs->refcyc_x_after_scaler); + dml_print("DML: dst_y_after_scaler = 0x%x\n", dlg_regs->dst_y_after_scaler); + dml_print("DML: dst_y_prefetch = 0x%x\n", dlg_regs->dst_y_prefetch); + dml_print("DML: dst_y_per_vm_vblank = 0x%x\n", dlg_regs->dst_y_per_vm_vblank); + dml_print("DML: dst_y_per_row_vblank = 0x%x\n", dlg_regs->dst_y_per_row_vblank); + dml_print("DML: dst_y_per_vm_flip = 0x%x\n", dlg_regs->dst_y_per_vm_flip); + dml_print("DML: dst_y_per_row_flip = 0x%x\n", dlg_regs->dst_y_per_row_flip); + dml_print("DML: ref_freq_to_pix_freq = 0x%x\n", dlg_regs->ref_freq_to_pix_freq); + dml_print("DML: vratio_prefetch = 0x%x\n", dlg_regs->vratio_prefetch); + dml_print("DML: vratio_prefetch_c = 0x%x\n", dlg_regs->vratio_prefetch_c); + dml_print("DML: refcyc_per_pte_group_vblank_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_l); + dml_print("DML: refcyc_per_pte_group_vblank_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_c); + dml_print("DML: refcyc_per_meta_chunk_vblank_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_l); + dml_print("DML: refcyc_per_meta_chunk_vblank_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_c); + dml_print("DML: refcyc_per_pte_group_flip_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_l); + dml_print("DML: refcyc_per_pte_group_flip_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_c); + dml_print("DML: refcyc_per_meta_chunk_flip_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_l); + dml_print("DML: refcyc_per_meta_chunk_flip_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_c); + dml_print("DML: dst_y_per_pte_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_l); + dml_print("DML: dst_y_per_pte_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_c); + dml_print("DML: refcyc_per_pte_group_nom_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_l); + dml_print("DML: refcyc_per_pte_group_nom_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_c); + dml_print("DML: dst_y_per_meta_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_l); + dml_print("DML: dst_y_per_meta_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_c); + dml_print("DML: refcyc_per_meta_chunk_nom_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_l); + dml_print("DML: refcyc_per_meta_chunk_nom_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_c); + dml_print("DML: refcyc_per_line_delivery_pre_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_l); + dml_print("DML: refcyc_per_line_delivery_pre_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_c); + dml_print("DML: refcyc_per_line_delivery_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_l); + dml_print("DML: refcyc_per_line_delivery_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_c); + dml_print("DML: refcyc_per_vm_group_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_group_vblank); + dml_print("DML: refcyc_per_vm_group_flip = 0x%x\n", dlg_regs->refcyc_per_vm_group_flip); + dml_print("DML: refcyc_per_vm_req_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_req_vblank); + dml_print("DML: refcyc_per_vm_req_flip = 0x%x\n", dlg_regs->refcyc_per_vm_req_flip); + dml_print("DML: chunk_hdl_adjust_cur0 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur0); + dml_print("DML: dst_y_offset_cur1 = 0x%x\n", dlg_regs->dst_y_offset_cur1); + dml_print("DML: chunk_hdl_adjust_cur1 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur1); + dml_print("DML: vready_after_vcount0 = 0x%x\n", dlg_regs->vready_after_vcount0); + dml_print("DML: dst_y_delta_drq_limit = 0x%x\n", dlg_regs->dst_y_delta_drq_limit); + dml_print("DML: refcyc_per_vm_dmdata = 0x%x\n", dlg_regs->refcyc_per_vm_dmdata); + dml_print("DML: ===================================== \n"); +} + +void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DISPLAY_TTU_REGS_ST \n"); + dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm); + dml_print("DML: qos_level_high_wm = 0x%x\n", ttu_regs->qos_level_high_wm); + dml_print("DML: min_ttu_vblank = 0x%x\n", ttu_regs->min_ttu_vblank); + dml_print("DML: qos_level_flip = 0x%x\n", ttu_regs->qos_level_flip); + dml_print("DML: refcyc_per_req_delivery_pre_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_l); + dml_print("DML: refcyc_per_req_delivery_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_l); + dml_print("DML: refcyc_per_req_delivery_pre_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_c); + dml_print("DML: refcyc_per_req_delivery_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_c); + dml_print("DML: refcyc_per_req_delivery_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur0); + dml_print("DML: refcyc_per_req_delivery_pre_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur0); + dml_print("DML: refcyc_per_req_delivery_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur1); + dml_print("DML: refcyc_per_req_delivery_pre_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur1); + dml_print("DML: qos_level_fixed_l = 0x%x\n", ttu_regs->qos_level_fixed_l); + dml_print("DML: qos_ramp_disable_l = 0x%x\n", ttu_regs->qos_ramp_disable_l); + dml_print("DML: qos_level_fixed_c = 0x%x\n", ttu_regs->qos_level_fixed_c); + dml_print("DML: qos_ramp_disable_c = 0x%x\n", ttu_regs->qos_ramp_disable_c); + dml_print("DML: qos_level_fixed_cur0 = 0x%x\n", ttu_regs->qos_level_fixed_cur0); + dml_print("DML: qos_ramp_disable_cur0 = 0x%x\n", ttu_regs->qos_ramp_disable_cur0); + dml_print("DML: qos_level_fixed_cur1 = 0x%x\n", ttu_regs->qos_level_fixed_cur1); + dml_print("DML: qos_ramp_disable_cur1 = 0x%x\n", ttu_regs->qos_ramp_disable_cur1); + dml_print("DML: ===================================== \n"); +} + +void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DML_MODE_EVAL_POLICY_ST\n"); + dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting); + dml_print("DML: Policy: UseMinimumRequiredDCFCLK = 0x%x\n", policy->UseMinimumRequiredDCFCLK); + dml_print("DML: Policy: DRAMClockChangeRequirementFinal = 0x%x\n", policy->DRAMClockChangeRequirementFinal); + dml_print("DML: Policy: FCLKChangeRequirementFinal = 0x%x\n", policy->FCLKChangeRequirementFinal); + dml_print("DML: Policy: USRRetrainingRequiredFinal = 0x%x\n", policy->USRRetrainingRequiredFinal); + dml_print("DML: Policy: EnhancedPrefetchScheduleAccelerationFinal = 0x%x\n", policy->EnhancedPrefetchScheduleAccelerationFinal); + dml_print("DML: Policy: NomDETInKByteOverrideEnable = 0x%x\n", policy->NomDETInKByteOverrideEnable); + dml_print("DML: Policy: NomDETInKByteOverrideValue = 0x%x\n", policy->NomDETInKByteOverrideValue); + dml_print("DML: Policy: DCCProgrammingAssumesScanDirectionUnknownFinal = 0x%x\n", policy->DCCProgrammingAssumesScanDirectionUnknownFinal); + dml_print("DML: Policy: SynchronizeTimingsFinal = 0x%x\n", policy->SynchronizeTimingsFinal); + dml_print("DML: Policy: SynchronizeDRRDisplaysForUCLKPStateChangeFinal = 0x%x\n", policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal); + dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported); + dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported); + + for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { + dml_print("DML: i=%0d, Policy: MPCCombineUse = 0x%x\n", i, policy->MPCCombineUse[i]); + dml_print("DML: i=%0d, Policy: ODMUse = 0x%x\n", i, policy->ODMUse[i]); + dml_print("DML: i=%0d, Policy: ImmediateFlipRequirement = 0x%x\n", i, policy->ImmediateFlipRequirement[i]); + dml_print("DML: i=%0d, Policy: AllowForPStateChangeOrStutterInVBlank = 0x%x\n", i, policy->AllowForPStateChangeOrStutterInVBlank[i]); + } + dml_print("DML: ===================================== \n"); +} + +void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j) +{ + dml_print("DML: MODE SUPPORT: ===============================================\n"); + dml_print("DML: MODE SUPPORT: Voltage State %d\n", j); + dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Scale Ratio And Taps : %s\n", mode_lib->ms.support.ScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Source Format Pixel And Scan : %s\n", mode_lib->ms.support.SourceFormatPixelAndScanSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Viewport Size : %s\n", mode_lib->ms.support.ViewportSizeSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Link Rate Does Not Match DP Version : %s\n", mode_lib->ms.support.LinkRateDoesNotMatchDPVersion == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Link Rate For Multistream Not Indicated : %s\n", mode_lib->ms.support.LinkRateForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: BPP For Multi stream Not Indicated : %s\n", mode_lib->ms.support.BPPForMultistreamNotIndicated == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Multistream With HDMI Or eDP : %s\n", mode_lib->ms.support.MultistreamWithHDMIOreDP == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Exceeded Multistream Slots : %s\n", mode_lib->ms.support.ExceededMultistreamSlots == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: MSO Or ODM Split With Non DP Link : %s\n", mode_lib->ms.support.MSOOrODMSplitWithNonDPLink == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Not Enough Lanes For MSO : %s\n", mode_lib->ms.support.NotEnoughLanesForMSO == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: LinkCapacitySupport : %s\n", mode_lib->ms.support.LinkCapacitySupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: P2IWith420 : %s\n", mode_lib->ms.support.P2IWith420 == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSCOnlyIfNecessaryWithBPP : %s\n", mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSC422NativeNotSupported : %s\n", mode_lib->ms.support.DSC422NativeNotSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: MPCCombineMethodIncompatible : %s\n", mode_lib->ms.support.MPCCombineMethodIncompatible == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ODMCombineTwoToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ODMCombineFourToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: NotEnoughDSCUnits : %s\n", mode_lib->ms.support.NotEnoughDSCUnits == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: NotEnoughDSCSlices : %s\n", mode_lib->ms.support.NotEnoughDSCSlices == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe : %s\n", mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPStateAndStaticScreen : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DSCCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: PixelsPerLinePerDSCUnitSupport : %s\n", mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DTBCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPState : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPState == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified : %s\n", mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ROB Support : %s\n", mode_lib->ms.support.ROBSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DISPCLK DPPCLK Support : %s\n", mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Total Available Pipes Support : %s\n", mode_lib->ms.support.TotalAvailablePipesSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Number Of OTG Support : %s\n", mode_lib->ms.support.NumberOfOTGSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Number Of DP2p0 Support : %s\n", mode_lib->ms.support.NumberOfDP2p0Support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Writeback Latency Support : %s\n", mode_lib->ms.support.WritebackLatencySupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Writeback Scale Ratio And Taps Support : %s\n", mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Cursor Support : %s\n", mode_lib->ms.support.CursorSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Pitch Support : %s\n", mode_lib->ms.support.PitchSupport == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Viewport Exceeds Surface : %s\n", mode_lib->ms.support.ViewportExceedsSurface == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Prefetch Supported : %s\n", mode_lib->ms.support.PrefetchSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: VActive Bandwith Support : %s\n", mode_lib->ms.support.VActiveBandwithSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Dynamic Metadata Supported : %s\n", mode_lib->ms.support.DynamicMetadataSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Total Vertical Active Bandwidth Support : %s\n", mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: VRatio In Prefetch Supported : %s\n", mode_lib->ms.support.VRatioInPrefetchSupported[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: PTE Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: DCC Meta Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Non supported DSC Input BPC : %s\n", mode_lib->ms.support.NonsupportedDSCInputBPC == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Exceeded MALL Size : %s\n", mode_lib->ms.support.ExceededMALLSize == false ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: Host VM or Immediate Flip Supported : %s\n", ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !mode_lib->scratch.dml_core_mode_support_locals.ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: dram clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.dram_clock_change_support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: f_clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.f_clock_change_support == true ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: USR Retraining Support : %s\n", (!mode_lib->ms.policy.USRRetrainingRequiredFinal || &mode_lib->ms.support.USRRetrainingSupport[j]) ? "Supported" : "NOT Supported"); + dml_print("DML: MODE SUPPORT: ===============================================\n"); +} + +void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only) +{ + dml_print("DML: ===================================== \n"); + dml_print("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ModeIsSupported == 0) + dml_print("DML: support: ModeIsSupported = 0x%x\n", support->ModeIsSupported); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml_print("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml_print("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml_print("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml_print("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->MPCCombineMethodIncompatible == 1) + dml_print("DML: support: MPCCombineMethodIncompatible = 0x%x\n", support->MPCCombineMethodIncompatible); + if (!fail_only || support->P2IWith420 == 1) + dml_print("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420); + if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) + dml_print("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml_print("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml_print("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml_print("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml_print("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml_print("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml_print("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml_print("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml_print("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml_print("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support); + if (!fail_only || support->NonsupportedDSCInputBPC == 1) + dml_print("DML: support: NonsupportedDSCInputBPC = 0x%x\n", support->NonsupportedDSCInputBPC); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml_print("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->CursorSupport == 0) + dml_print("DML: support: CursorSupport = 0x%x\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml_print("DML: support: PitchSupport = 0x%x\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml_print("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface); + if (!fail_only || support->ExceededMALLSize == 1) + dml_print("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml_print("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits); + if (!fail_only || support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == 1) + dml_print("DML: support: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = 0x%x\n", support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml_print("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml_print("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml_print("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState); + + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml_print("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots); + if (!fail_only || support->ODMCombineTwoToOneSupportCheckOK == 0) + dml_print("DML: support: ODMCombineTwoToOneSupportCheckOK = 0x%x\n", support->ODMCombineTwoToOneSupportCheckOK); + if (!fail_only || support->ODMCombineFourToOneSupportCheckOK == 0) + dml_print("DML: support: ODMCombineFourToOneSupportCheckOK = 0x%x\n", support->ODMCombineFourToOneSupportCheckOK); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml_print("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml_print("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml_print("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml_print("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml_print("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->LinkCapacitySupport == 0) + dml_print("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport); + + for (dml_uint_t j = 0; j < 2; j++) { + if (!fail_only || support->DRAMClockChangeSupport[j] == dml_dram_clock_change_unsupported) + dml_print("DML: support: combine=%d, DRAMClockChangeSupport = %d\n", j, support->DRAMClockChangeSupport[j]); + if (!fail_only || support->FCLKChangeSupport[j] == dml_fclock_change_unsupported) + dml_print("DML: support: combine=%d, FCLKChangeSupport = %d\n", j, support->FCLKChangeSupport[j]); + if (!fail_only || support->ROBSupport[j] == 0) + dml_print("DML: support: combine=%d, ROBSupport = %d\n", j, support->ROBSupport[j]); + if (!fail_only || support->PTEBufferSizeNotExceeded[j] == 0) + dml_print("DML: support: combine=%d, PTEBufferSizeNotExceeded = %d\n", j, support->PTEBufferSizeNotExceeded[j]); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded[j] == 0) + dml_print("DML: support: combine=%d, DCCMetaBufferSizeNotExceeded = %d\n", j, support->DCCMetaBufferSizeNotExceeded[j]); + if (!fail_only || support->TotalVerticalActiveBandwidthSupport[j] == 0) + dml_print("DML: support: combine=%d, TotalVerticalActiveBandwidthSupport = %d\n", j, support->TotalVerticalActiveBandwidthSupport[j]); + if (!fail_only || support->USRRetrainingSupport[j] == 0) + dml_print("DML: support: combine=%d, USRRetrainingSupport = %d\n", j, support->USRRetrainingSupport[j]); + if (!fail_only || support->VActiveBandwithSupport[j] == 0) + dml_print("DML: support: combine=%d, VActiveBandwithSupport = %d\n", j, support->VActiveBandwithSupport[j]); + if (!fail_only || support->PrefetchSupported[j] == 0) + dml_print("DML: support: combine=%d, PrefetchSupported = %d\n", j, support->PrefetchSupported[j]); + if (!fail_only || support->DynamicMetadataSupported[j] == 0) + dml_print("DML: support: combine=%d, DynamicMetadataSupported = %d\n", j, support->DynamicMetadataSupported[j]); + if (!fail_only || support->VRatioInPrefetchSupported[j] == 0) + dml_print("DML: support: combine=%d, VRatioInPrefetchSupported = %d\n", j, support->VRatioInPrefetchSupported[j]); + if (!fail_only || support->DISPCLK_DPPCLK_Support[j] == 0) + dml_print("DML: support: combine=%d, DISPCLK_DPPCLK_Support = %d\n", j, support->DISPCLK_DPPCLK_Support[j]); + if (!fail_only || support->TotalAvailablePipesSupport[j] == 0) + dml_print("DML: support: combine=%d, TotalAvailablePipesSupport = %d\n", j, support->TotalAvailablePipesSupport[j]); + if (!fail_only || support->ModeSupport[j] == 0) + dml_print("DML: support: combine=%d, ModeSupport = %d\n", j, support->ModeSupport[j]); + if (!fail_only || support->ViewportSizeSupport[j] == 0) + dml_print("DML: support: combine=%d, ViewportSizeSupport = %d\n", j, support->ViewportSizeSupport[j]); + if (!fail_only || support->ImmediateFlipSupportedForState[j] == 0) + dml_print("DML: support: combine=%d, ImmediateFlipSupportedForState = %d\n", j, support->ImmediateFlipSupportedForState[j]); + } +} + +void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]); + dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]); + dml_print("DML: timing_cfg: plane=%d, HActive = %d\n", i, timing->HActive[i]); + dml_print("DML: timing_cfg: plane=%d, VActive = %d\n", i, timing->VActive[i]); + dml_print("DML: timing_cfg: plane=%d, VFrontPorch = %d\n", i, timing->VFrontPorch[i]); + dml_print("DML: timing_cfg: plane=%d, VBlankNom = %d\n", i, timing->VBlankNom[i]); + dml_print("DML: timing_cfg: plane=%d, RefreshRate = %d\n", i, timing->RefreshRate[i]); + dml_print("DML: timing_cfg: plane=%d, PixelClock = %f\n", i, timing->PixelClock[i]); + dml_print("DML: timing_cfg: plane=%d, Interlace = %d\n", i, timing->Interlace[i]); + dml_print("DML: timing_cfg: plane=%d, DRRDisplay = %d\n", i, timing->DRRDisplay[i]); + } +} + +void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane) +{ + dml_print("DML: plane_cfg: num_plane = %d\n", num_plane); + dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable); + dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable); + dml_print("DML: plane_cfg: GPUVMMaxPageTableLevels = %d\n", plane->GPUVMMaxPageTableLevels); + dml_print("DML: plane_cfg: HostVMMaxPageTableLevels = %d\n", plane->HostVMMaxPageTableLevels); + + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: plane_cfg: plane=%d, GPUVMMinPageSizeKBytes = %d\n", i, plane->GPUVMMinPageSizeKBytes[i]); + dml_print("DML: plane_cfg: plane=%d, ForceOneRowForFrame = %d\n", i, plane->ForceOneRowForFrame[i]); + dml_print("DML: plane_cfg: plane=%d, PTEBufferModeOverrideEn = %d\n", i, plane->PTEBufferModeOverrideEn[i]); + dml_print("DML: plane_cfg: plane=%d, PTEBufferMode = %d\n", i, plane->PTEBufferMode[i]); + dml_print("DML: plane_cfg: plane=%d, DETSizeOverride = %d\n", i, plane->DETSizeOverride[i]); + dml_print("DML: plane_cfg: plane=%d, UseMALLForStaticScreen = %d\n", i, plane->UseMALLForStaticScreen[i]); + dml_print("DML: plane_cfg: plane=%d, UseMALLForPStateChange = %d\n", i, plane->UseMALLForPStateChange[i]); + dml_print("DML: plane_cfg: plane=%d, BlendingAndTiming = %d\n", i, plane->BlendingAndTiming[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportWidth = %d\n", i, plane->ViewportWidth[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportHeight = %d\n", i, plane->ViewportHeight[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportWidthChroma = %d\n", i, plane->ViewportWidthChroma[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportHeightChroma = %d\n", i, plane->ViewportHeightChroma[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportXStart = %d\n", i, plane->ViewportXStart[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportXStartC = %d\n", i, plane->ViewportXStartC[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportYStart = %d\n", i, plane->ViewportYStart[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportYStartC = %d\n", i, plane->ViewportYStartC[i]); + dml_print("DML: plane_cfg: plane=%d, ViewportStationary = %d\n", i, plane->ViewportStationary[i]); + dml_print("DML: plane_cfg: plane=%d, ScalerEnabled = %d\n", i, plane->ScalerEnabled[i]); + dml_print("DML: plane_cfg: plane=%d, HRatio = %3.2f\n", i, plane->HRatio[i]); + dml_print("DML: plane_cfg: plane=%d, VRatio = %3.2f\n", i, plane->VRatio[i]); + dml_print("DML: plane_cfg: plane=%d, HRatioChroma = %3.2f\n", i, plane->HRatioChroma[i]); + dml_print("DML: plane_cfg: plane=%d, VRatioChroma = %3.2f\n", i, plane->VRatioChroma[i]); + dml_print("DML: plane_cfg: plane=%d, HTaps = %d\n", i, plane->HTaps[i]); + dml_print("DML: plane_cfg: plane=%d, VTaps = %d\n", i, plane->VTaps[i]); + dml_print("DML: plane_cfg: plane=%d, HTapsChroma = %d\n", i, plane->HTapsChroma[i]); + dml_print("DML: plane_cfg: plane=%d, VTapsChroma = %d\n", i, plane->VTapsChroma[i]); + dml_print("DML: plane_cfg: plane=%d, LBBitPerPixel = %d\n", i, plane->LBBitPerPixel[i]); + dml_print("DML: plane_cfg: plane=%d, SourceScan = %d\n", i, plane->SourceScan[i]); + dml_print("DML: plane_cfg: plane=%d, ScalerRecoutWidth = %d\n", i, plane->ScalerRecoutWidth[i]); + dml_print("DML: plane_cfg: plane=%d, NumberOfCursors = %d\n", i, plane->NumberOfCursors[i]); + dml_print("DML: plane_cfg: plane=%d, CursorWidth = %d\n", i, plane->CursorWidth[i]); + dml_print("DML: plane_cfg: plane=%d, CursorBPP = %d\n", i, plane->CursorBPP[i]); + + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataEnable = %d\n", i, plane->DynamicMetadataEnable[i]); + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataLinesBeforeActiveRequired = %d\n", i, plane->DynamicMetadataLinesBeforeActiveRequired[i]); + dml_print("DML: plane_cfg: plane=%d, DynamicMetadataTransmittedBytes = %d\n", i, plane->DynamicMetadataTransmittedBytes[i]); + } +} + +void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceHeightY = %d\n", i, surface->SurfaceHeightY[i]); + dml_print("DML: surface_cfg: plane=%d, PitchC = %d\n", i, surface->PitchC[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceWidthC = %d\n", i, surface->SurfaceWidthC[i]); + dml_print("DML: surface_cfg: plane=%d, SurfaceHeightC = %d\n", i, surface->SurfaceHeightC[i]); + dml_print("DML: surface_cfg: plane=%d, DCCEnable = %d\n", i, surface->DCCEnable[i]); + dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchY = %d\n", i, surface->DCCMetaPitchY[i]); + dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchC = %d\n", i, surface->DCCMetaPitchC[i]); + dml_print("DML: surface_cfg: plane=%d, DCCRateLuma = %f\n", i, surface->DCCRateLuma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCRateChroma = %f\n", i, surface->DCCRateChroma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsLuma = %f\n", i, surface->DCCFractionOfZeroSizeRequestsLuma[i]); + dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsChroma= %f\n", i, surface->DCCFractionOfZeroSizeRequestsChroma[i]); + } +} + +void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane) +{ + for (dml_uint_t i = 0; i < num_plane; i++) { + dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]); + dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]); + dml_print("DML: hw_resource: plane=%d, DSCEnabled = %d\n", i, hw->DSCEnabled[i]); + dml_print("DML: hw_resource: plane=%d, NumberOfDSCSlices = %d\n", i, hw->NumberOfDSCSlices[i]); + } + dml_print("DML: hw_resource: DLGRefClkFreqMHz = %f\n", hw->DLGRefClkFreqMHz); +} + +__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state) +{ + dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz); + dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz); + dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz); + dml_print("DML: state_bbox: phyclk_d18_mhz = %f\n", state->phyclk_d18_mhz); + dml_print("DML: state_bbox: phyclk_d32_mhz = %f\n", state->phyclk_d32_mhz); + dml_print("DML: state_bbox: dtbclk_mhz = %f\n", state->dtbclk_mhz); + dml_print("DML: state_bbox: dispclk_mhz = %f\n", state->dispclk_mhz); + dml_print("DML: state_bbox: dppclk_mhz = %f\n", state->dppclk_mhz); + dml_print("DML: state_bbox: fabricclk_mhz = %f\n", state->fabricclk_mhz); + dml_print("DML: state_bbox: dcfclk_mhz = %f\n", state->dcfclk_mhz); + dml_print("DML: state_bbox: dram_speed_mts = %f\n", state->dram_speed_mts); + dml_print("DML: state_bbox: urgent_latency_pixel_data_only_us = %f\n", state->urgent_latency_pixel_data_only_us); + dml_print("DML: state_bbox: urgent_latency_pixel_mixed_with_vm_data_us = %f\n", state->urgent_latency_pixel_mixed_with_vm_data_us); + dml_print("DML: state_bbox: urgent_latency_vm_data_only_us = %f\n", state->urgent_latency_vm_data_only_us); + dml_print("DML: state_bbox: writeback_latency_us = %f\n", state->writeback_latency_us); + dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_component_us = %f\n", state->urgent_latency_adjustment_fabric_clock_component_us); + dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_reference_mhz= %f\n", state->urgent_latency_adjustment_fabric_clock_reference_mhz); + dml_print("DML: state_bbox: sr_exit_time_us = %f\n", state->sr_exit_time_us); + dml_print("DML: state_bbox: sr_enter_plus_exit_time_us = %f\n", state->sr_enter_plus_exit_time_us); + dml_print("DML: state_bbox: sr_exit_z8_time_us = %f\n", state->sr_exit_z8_time_us); + dml_print("DML: state_bbox: sr_enter_plus_exit_z8_time_us = %f\n", state->sr_enter_plus_exit_z8_time_us); + dml_print("DML: state_bbox: dram_clock_change_latency_us = %f\n", state->dram_clock_change_latency_us); + dml_print("DML: state_bbox: fclk_change_latency_us = %f\n", state->fclk_change_latency_us); + dml_print("DML: state_bbox: usr_retraining_latency_us = %f\n", state->usr_retraining_latency_us); + dml_print("DML: state_bbox: use_ideal_dram_bw_strobe = %d\n", state->use_ideal_dram_bw_strobe); +} + +__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc) +{ + dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz); + dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz); + dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz); + dml_print("DML: soc_bbox: refclk_mhz = %f\n", soc->refclk_mhz); + dml_print("DML: soc_bbox: amclk_mhz = %f\n", soc->amclk_mhz); + + dml_print("DML: soc_bbox: max_outstanding_reqs = %f\n", soc->max_outstanding_reqs); + dml_print("DML: soc_bbox: pct_ideal_sdp_bw_after_urgent = %f\n", soc->pct_ideal_sdp_bw_after_urgent); + dml_print("DML: soc_bbox: pct_ideal_fabric_bw_after_urgent = %f\n", soc->pct_ideal_fabric_bw_after_urgent); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_only); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_and_vm = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_vm_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_vm_only); + dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_strobe = %f\n", soc->pct_ideal_dram_bw_after_urgent_strobe); + dml_print("DML: soc_bbox: max_avg_sdp_bw_use_normal_percent = %f\n", soc->max_avg_sdp_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_fabric_bw_use_normal_percent = %f\n", soc->max_avg_fabric_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_percent = %f\n", soc->max_avg_dram_bw_use_normal_percent); + dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_strobe_percent = %f\n", soc->max_avg_dram_bw_use_normal_strobe_percent); + dml_print("DML: soc_bbox: round_trip_ping_latency_dcfclk_cycles = %d\n", soc->round_trip_ping_latency_dcfclk_cycles); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_only_bytes); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes); + dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_vm_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_vm_only_bytes); + dml_print("DML: soc_bbox: num_chans = %d\n", soc->num_chans); + dml_print("DML: soc_bbox: return_bus_width_bytes = %d\n", soc->return_bus_width_bytes); + dml_print("DML: soc_bbox: dram_channel_width_bytes = %d\n", soc->dram_channel_width_bytes); + dml_print("DML: soc_bbox: fabric_datapath_to_dcn_data_return_bytes = %d\n", soc->fabric_datapath_to_dcn_data_return_bytes); + dml_print("DML: soc_bbox: hostvm_min_page_size_kbytes = %d\n", soc->hostvm_min_page_size_kbytes); + dml_print("DML: soc_bbox: gpuvm_min_page_size_kbytes = %d\n", soc->gpuvm_min_page_size_kbytes); + dml_print("DML: soc_bbox: phy_downspread_percent = %f\n", soc->phy_downspread_percent); + dml_print("DML: soc_bbox: dcn_downspread_percent = %f\n", soc->dcn_downspread_percent); + dml_print("DML: soc_bbox: smn_latency_us = %f\n", soc->smn_latency_us); + dml_print("DML: soc_bbox: mall_allocated_for_dcn_mbytes = %d\n", soc->mall_allocated_for_dcn_mbytes); + dml_print("DML: soc_bbox: dispclk_dppclk_vco_speed_mhz = %f\n", soc->dispclk_dppclk_vco_speed_mhz); + dml_print("DML: soc_bbox: do_urgent_latency_adjustment = %d\n", soc->do_urgent_latency_adjustment); +} + +__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) +{ + dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n"); + dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); + dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); + + dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz); + dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz); + + for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { + dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]); + dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]); + } +} + +dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle Scan) +{ + dml_bool_t is_vert = false; + if (Scan == dml_rotation_90 || Scan == dml_rotation_90m || Scan == dml_rotation_270 || Scan == dml_rotation_270m) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} // dml_is_vertical_rotation + +dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp) +{ + switch (ebpp) { + case dml_cur_2bit: + return 2; + case dml_cur_32bit: + return 32; + case dml_cur_64bit: + return 64; + default: + return 0; + } +} + +/// @brief Determine the physical pipe to logical plane mapping using the display_cfg +dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg) +{ + dml_uint_t num_active_planes = 0; + + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; k++) { + if (display_cfg->plane.ViewportWidth[k] > 0) + num_active_planes = num_active_planes + 1; + } +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: num_active_planes = %d\n", __func__, num_active_planes); +#endif + return num_active_planes; +} + +/// @brief Determine the physical pipe to logical plane mapping using the display_cfg +dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg) +{ + dml_uint_t num_active_pipes = 0; + + for (dml_uint_t j = 0; j < dml_get_num_active_planes(display_cfg); j++) { + num_active_pipes = num_active_pipes + display_cfg->hw.DPPPerSurface[j]; + } + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx) +{ + dml_uint_t pipe_idx = 0; + dml_bool_t pipe_found = 0; + + ASSERT(plane_idx < __DML_NUM_PLANES__); + + for (dml_uint_t i = 0; i < __DML_NUM_PLANES__; i++) { + if (plane_idx == mode_lib->mp.pipe_plane[i]) { + pipe_idx = i; + pipe_found = 1; + break; + } + } + ASSERT(pipe_found != 0); + + return pipe_idx; +} + +void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane) +{ + dml_uint_t pipe_idx = 0; + + for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) { + pipe_plane[k] = __DML_PIPE_NO_PLANE__; + } + + for (dml_uint_t plane_idx = 0; plane_idx < __DML_NUM_PLANES__; plane_idx++) { + for (dml_uint_t i = 0; i < hw->DPPPerSurface[plane_idx]; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h new file mode 100644 index 000000000000..a82b49cf7fb0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/display_mode_util.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DISPLAY_MODE_UTIL_H__ +#define __DISPLAY_MODE_UTIL_H__ + +#include "display_mode_core_structs.h" +#include "cmntypes.h" + +#include "dml_assert.h" +#include "dml_logging.h" + +__DML_DLL_EXPORT__ dml_bool_t dml_util_is_420(enum dml_source_format_class source_format); +__DML_DLL_EXPORT__ dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity); +__DML_DLL_EXPORT__ dml_float_t dml_floor(dml_float_t x, dml_float_t granularity); +__DML_DLL_EXPORT__ dml_float_t dml_min(dml_float_t x, dml_float_t y); +__DML_DLL_EXPORT__ dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z); +__DML_DLL_EXPORT__ dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w); +__DML_DLL_EXPORT__ dml_float_t dml_max(dml_float_t x, dml_float_t y); +__DML_DLL_EXPORT__ dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z); +__DML_DLL_EXPORT__ dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d); +__DML_DLL_EXPORT__ dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e); +__DML_DLL_EXPORT__ dml_float_t dml_log(dml_float_t x, dml_float_t base); +__DML_DLL_EXPORT__ dml_float_t dml_log2(dml_float_t x); +__DML_DLL_EXPORT__ dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding); +__DML_DLL_EXPORT__ dml_float_t dml_pow(dml_float_t base, int exp); +__DML_DLL_EXPORT__ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up); +__DML_DLL_EXPORT__ dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle scan); +__DML_DLL_EXPORT__ dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp); +__DML_DLL_EXPORT__ void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *data_rq_regs); +__DML_DLL_EXPORT__ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs); +__DML_DLL_EXPORT__ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs); +__DML_DLL_EXPORT__ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs); +__DML_DLL_EXPORT__ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy); +__DML_DLL_EXPORT__ void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j); +__DML_DLL_EXPORT__ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane); +__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state); +__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc); +__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg); + +__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg); +__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg); +__DML_DLL_EXPORT__ dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx); +__DML_DLL_EXPORT__ dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx); +__DML_DLL_EXPORT__ void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c new file mode 100644 index 000000000000..bf5e7f4e0416 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.c @@ -0,0 +1,929 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml21_wrapper.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "soc_and_ip_translator.h" + +static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, + const struct dc *in_dc, + const struct dml2_configuration_options *config) +{ + bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable; + + /* ODM options */ + pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; + pmo_options->disable_dyn_odm_for_multi_stream = true; + pmo_options->disable_dyn_odm_for_stream_with_svp = true; + + pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); + + /* NOTE: DRR and SubVP Require FAMS2 */ + pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || + in_dc->debug.force_disable_subvp || + disable_fams2; + pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || + disable_fams2; + pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || + disable_fams2; + pmo_options->disable_fams2 = disable_fams2; + + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; +} + +static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version) +{ + enum dml2_project_id project_id; + switch (dcn_version) { + case DCN_VERSION_4_01: + project_id = dml2_project_dcn4x_stage2_auto_drr_svp; + break; + default: + project_id = dml2_project_invalid; + DC_ERR("unsupported dcn version for DML21!"); + break; + } + + return project_id; +} + +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version); + + if (config->use_native_soc_bb_construction) { + in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config); + in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps); + } else { + dml_init->soc_bb = config->external_socbb_ip_params->soc_bb; + dml_init->ip_caps = config->external_socbb_ip_params->ip_params; + } + + dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); +} + +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + +static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, + struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, + struct dml2_context *dml_ctx) +{ + unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; + uint32_t pix_clk_100hz; + + timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; + timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + timing->h_front_porch = stream->timing.h_front_porch; + timing->v_front_porch = stream->timing.v_front_porch; + timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10; + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + timing->pixel_clock_khz *= 2; + timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; + timing->v_total = stream->timing.v_total; + timing->h_sync_width = stream->timing.h_sync_width; + timing->interlaced = stream->timing.flags.INTERLACE; + + hblank_start = stream->timing.h_total - stream->timing.h_front_porch; + + timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding + - stream->timing.h_border_left - stream->timing.h_border_right; + + if (hblank_start < stream->timing.h_addressable) + timing->h_blank_end = 0; + + vblank_start = stream->timing.v_total - stream->timing.v_front_porch; + + timing->v_blank_end = vblank_start - stream->timing.v_addressable + - stream->timing.v_border_top - stream->timing.v_border_bottom; + + timing->drr_config.enabled = stream->ignore_msa_timing_param; + timing->drr_config.drr_active_variable = stream->vrr_active_variable; + timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; + timing->drr_config.disallowed = !stream->allow_freesync; + + /* limit min refresh rate to DC cap */ + min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; + if (stream->ctx->dc->caps.max_v_total != 0) { + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) { + pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } else { + pix_clk_100hz = stream->timing.pix_clk_100hz; + } + min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL), + (timing->h_total * (long long)calc_max_hardware_v_total(stream))); + } + + timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); + + if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && + stream->ctx->dc->config.enable_fpo_flicker_detection == 1) + timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false); + else + timing->drr_config.max_instant_vtotal_delta = 0; + + if (stream->timing.flags.DSC) { + timing->dsc.enable = dml2_dsc_enable; + timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h; + timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel; + } else + timing->dsc.enable = dml2_dsc_disable; + + switch (stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + timing->bpc = 6; + break; + case COLOR_DEPTH_888: + timing->bpc = 8; + break; + case COLOR_DEPTH_101010: + timing->bpc = 10; + break; + case COLOR_DEPTH_121212: + timing->bpc = 12; + break; + case COLOR_DEPTH_141414: + timing->bpc = 14; + break; + case COLOR_DEPTH_161616: + timing->bpc = 16; + break; + case COLOR_DEPTH_999: + timing->bpc = 9; + break; + case COLOR_DEPTH_111111: + timing->bpc = 11; + break; + default: + timing->bpc = 8; + break; + } + + timing->vblank_nom = timing->v_total - timing->v_active; +} + +static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, + struct dc_stream_state *stream, const struct pipe_ctx *pipe) +{ + output->output_dp_lane_count = 4; + + switch (stream->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + output->output_encoder = dml2_dp; + if (check_dp2p0_output_encoder(pipe)) + output->output_encoder = dml2_dp2p0; + break; + case SIGNAL_TYPE_EDP: + output->output_encoder = dml2_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + output->output_encoder = dml2_hdmi; + break; + default: + output->output_encoder = dml2_dp; + } + + switch (stream->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + output->output_format = dml2_444; + break; + case PIXEL_ENCODING_YCBCR420: + output->output_format = dml2_420; + break; + case PIXEL_ENCODING_YCBCR422: + if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple) + output->output_format = dml2_n422; + else + output->output_format = dml2_s422; + break; + default: + output->output_format = dml2_444; + break; + } + + switch (stream->signal) { + case SIGNAL_TYPE_NONE: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_RGB: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_VIRTUAL: + default: + output->output_dp_link_rate = dml2_dp_rate_na; + break; + } + + output->audio_sample_layout = stream->audio_info.modes->sample_size; + output->audio_sample_rate = stream->audio_info.modes->max_bit_rate; + output->output_disabled = true; + + //TODO : New to DML2.1. How do we populate this ? + // output->validate_output +} + +static void populate_dml21_stream_overrides_from_stream_state( + struct dml2_stream_parameters *stream_desc, + struct dc_stream_state *stream, + struct dc_stream_status *stream_status) +{ + switch (stream->debug.force_odm_combine_segments) { + case 0: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + case 1: + stream_desc->overrides.odm_mode = dml2_odm_mode_bypass; + break; + case 2: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + } + if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || + stream->debug.force_odm_combine_segments > 0) + stream_desc->overrides.disable_dynamic_odm = true; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || + stream->hw_cursor_req || + stream_status->mall_stream_config.cursor_size_limit_subvp; +} + +static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_linear; + + switch (addr3_mode) { + case DC_ADDR3_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_ADDR3_SW_256B_2D: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_ADDR3_SW_4KB_2D: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_ADDR3_SW_64KB_2D: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_ADDR3_SW_256KB_2D: + dml2_mode = dml2_sw_256kb_2d; + break; + default: + /* invalid swizzle mode for DML2.1 */ + ASSERT(false); + dml2_mode = dml2_sw_linear; + } + + return dml2_mode; +} + +static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d; + + switch (gfx9_mode) { + case DC_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_SW_256_D: + case DC_SW_256_R: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_SW_4KB_D: + case DC_SW_4KB_R: + case DC_SW_4KB_R_X: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_SW_64KB_D: + case DC_SW_64KB_D_X: + case DC_SW_64KB_R: + case DC_SW_64KB_R_X: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_SW_256B_S: + case DC_SW_4KB_S: + case DC_SW_64KB_S: + case DC_SW_VAR_S: + case DC_SW_VAR_D: + case DC_SW_VAR_R: + case DC_SW_64KB_S_T: + case DC_SW_64KB_D_T: + case DC_SW_4KB_S_X: + case DC_SW_4KB_D_X: + case DC_SW_64KB_S_X: + case DC_SW_VAR_S_X: + case DC_SW_VAR_D_X: + case DC_SW_VAR_R_X: + default: + /* + * invalid swizzle mode for DML2.1. This could happen because + * DML21 is not intended to be used by N-1 in production. To + * properly filter out unsupported swizzle modes, we will need + * to fix capability reporting when DML2.1 is used for N-1 in + * dc. So DML will only receive DML21 supported swizzle modes. + * This implementation is not added and has a low value because + * the supported swizzle modes should already cover most of our + * N-1 test cases. + */ + return dml2_sw_64kb_2d; + } + + return dml2_mode; +} + +static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream) +{ + surface->plane0.width = stream->timing.h_addressable; + surface->plane0.height = stream->timing.v_addressable; + surface->plane1.width = stream->timing.h_addressable; + surface->plane1.height = stream->timing.v_addressable; + surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128; + surface->plane1.pitch = 0; + surface->dcc.enable = false; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0; + surface->tiling = dml2_sw_64kb_2d; +} + +static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream) +{ + unsigned int width, height; + + if (stream->timing.h_addressable > 3840) + width = 3840; + else + width = stream->timing.h_addressable; // 4K max + + if (stream->timing.v_addressable > 2160) + height = 2160; + else + height = stream->timing.v_addressable; // 4K max + + plane->cursor.cursor_bpp = 32; + + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + plane->composition.viewport.plane0.width = width; + plane->composition.viewport.plane0.height = height; + plane->composition.viewport.plane1.width = 0; + plane->composition.viewport.plane1.height = 0; + + plane->composition.viewport.stationary = false; + plane->composition.viewport.plane0.x_start = 0; + plane->composition.viewport.plane0.y_start = 0; + plane->composition.viewport.plane1.x_start = 0; + plane->composition.viewport.plane1.y_start = 0; + + plane->composition.scaler_info.enabled = false; + plane->composition.rotation_angle = dml2_rotation_0; + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 0; + plane->composition.scaler_info.plane1.v_ratio = 0; + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 0; + plane->composition.scaler_info.plane1.v_taps = 0; + plane->composition.scaler_info.rect_out_width = width; + plane->pixel_format = dml2_444_32; + + plane->dynamic_meta_data.enable = false; + plane->overrides.gpuvm_min_page_size_kbytes = 256; +} + +static void populate_dml21_surface_config_from_plane_state( + const struct dc *in_dc, + struct dml2_surface_cfg *surface, + const struct dc_plane_state *plane_state) +{ + surface->plane0.pitch = plane_state->plane_size.surface_pitch; + surface->plane1.pitch = plane_state->plane_size.chroma_pitch; + surface->plane0.height = plane_state->plane_size.surface_size.height; + surface->plane0.width = plane_state->plane_size.surface_size.width; + surface->plane1.height = plane_state->plane_size.chroma_size.height; + surface->plane1.width = plane_state->plane_size.chroma_size.width; + surface->dcc.enable = plane_state->dcc.enable; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; + surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; + surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; + + // Update swizzle / array mode based on the gfx_format + switch (plane_state->tiling_info.gfxversion) { + case DcGfxVersion7: + case DcGfxVersion8: + break; + case DcGfxVersion9: + case DcGfxVersion10: + case DcGfxVersion11: + surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); + break; + case DcGfxAddr3: + surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); + break; + } +} + +static const struct scaler_data *get_scaler_data_for_plane( + struct dml2_context *dml_ctx, + const struct dc_plane_state *in, + const struct dc_state *context) +{ + int i; + struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); + + for (i = 0; i < MAX_PIPES; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == in && !pipe->prev_odm_pipe) { + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + temp_pipe->stream_res = pipe->stream_res; + temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding; + temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding; + temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz; + dml_ctx->config.callbacks.build_scaling_params(temp_pipe); + break; + } + } + + ASSERT(i < MAX_PIPES); + return &temp_pipe->plane_res.scl_data; +} + +static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx, + struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state, + const struct dc_state *context, unsigned int stream_index) +{ + const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); + struct dc_stream_state *stream = context->streams[stream_index]; + + plane->cursor.cursor_bpp = 32; + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + switch (plane_state->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + plane->pixel_format = dml2_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + plane->pixel_format = dml2_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + plane->pixel_format = dml2_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + plane->pixel_format = dml2_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + plane->pixel_format = dml2_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + plane->pixel_format = dml2_rgbe_alpha; + break; + default: + plane->pixel_format = dml2_444_32; + break; + } + + plane->composition.viewport.plane0.height = scaler_data->viewport.height; + plane->composition.viewport.plane0.width = scaler_data->viewport.width; + plane->composition.viewport.plane1.height = scaler_data->viewport_c.height; + plane->composition.viewport.plane1.width = scaler_data->viewport_c.width; + plane->composition.viewport.plane0.x_start = scaler_data->viewport.x; + plane->composition.viewport.plane0.y_start = scaler_data->viewport.y; + plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x; + plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y; + plane->composition.viewport.stationary = false; + plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value || + scaler_data->ratios.horz_c.value != dc_fixpt_one.value || + scaler_data->ratios.vert.value != dc_fixpt_one.value || + scaler_data->ratios.vert_c.value != dc_fixpt_one.value; + + if (!scaler_data->taps.h_taps) { + /* Above logic determines scaling should be enabled even when there are no taps for + * certain cases. Hence do corrective active and disable scaling. + */ + plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; + } + + /* always_scale is only used for debug purposes not used in production but has to be + * maintained for certain complainces. */ + if (plane_state->ctx->dc->debug.always_scale == true) { + plane->composition.scaler_info.enabled = true; + } + + if (plane->composition.scaler_info.enabled == false) { + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 1.0; + plane->composition.scaler_info.plane1.v_ratio = 1.0; + } else { + plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32); + plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32); + plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32); + plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32); + } + + if (!scaler_data->taps.h_taps) { + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 1; + } else { + plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps; + plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c; + } + if (!scaler_data->taps.v_taps) { + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.v_taps = 1; + } else { + plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps; + plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c; + } + + plane->composition.viewport.stationary = false; + + if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { + plane->tdlut.setup_for_tdlut = true; + + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; + break; + } + + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_171717: + plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + default: + //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined + break; + } + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; + + plane->dynamic_meta_data.enable = false; + plane->dynamic_meta_data.lines_before_active_required = 0; + plane->dynamic_meta_data.transmitted_bytes = 0; + + plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width; + plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation; + plane->stream_index = stream_index; + + plane->overrides.gpuvm_min_page_size_kbytes = 256; + + plane->immediate_flip = plane_state->flip_immediate; + + plane->composition.rect_out_height_spans_vactive = + plane_state->dst_rect.height >= stream->src.height && + stream->dst.height >= stream->timing.v_addressable; +} + +//TODO : Could be possibly moved to a common helper layer. +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream) +{ + int i = 0; + int location = -1; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { + location = i; + break; + } + } + + return location; +} + +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, + const struct dc_plane_state *plane, const struct dc_state *context) +{ + unsigned int plane_id; + int i = 0; + int location = -1; + + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { + ASSERT(false); + return -1; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { + location = i; + break; + } + } + + return location; +} + +static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method) +{ + enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto; + + switch (force_pstate_method) { + case dml2_force_pstate_method_vactive: + val = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_force_pstate_method_vblank: + val = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_force_pstate_method_drr: + val = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_force_pstate_method_subvp: + val = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_force_pstate_method_auto: + default: + val = dml2_uclk_pstate_change_strategy_auto; + } + + return val; +} + +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + int stream_index, plane_index; + int disp_cfg_stream_location, disp_cfg_plane_location; + struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; + + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + + dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable; + dml_dispcfg->gpuvm_max_page_table_levels = 4; + dml_dispcfg->hostvm_enable = false; + dml_dispcfg->minimize_det_reallocation = true; + dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { + disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); + + if (disp_cfg_stream_location < 0) + disp_cfg_stream_location = dml_dispcfg->num_streams++; + + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx); + populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); + populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); + + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed; + + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id; + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; + + if (context->stream_status[stream_index].plane_count == 0) { + disp_cfg_plane_location = dml_dispcfg->num_planes++; + populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]); + populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + } else { + for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); + + if (disp_cfg_plane_location < 0) + disp_cfg_plane_location = dml_dispcfg->num_planes++; + + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); + populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + + /* apply forced pstate policy */ + if (dml_ctx->config.pmo.force_pstate_method_enable) { + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = + dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); + } + + plane_count++; + } + } + } + + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + + return true; +} + +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) +{ + /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency; +} + +static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) +{ + struct dml2_dchub_watermark_regs *wm_regs = NULL; + + switch (wm_index) { + case DML2_DCHUB_WATERMARK_SET_A: + wm_regs = &watermarks->dcn4x.a; + break; + case DML2_DCHUB_WATERMARK_SET_B: + wm_regs = &watermarks->dcn4x.b; + break; + case DML2_DCHUB_WATERMARK_SET_C: + wm_regs = &watermarks->dcn4x.c; + break; + case DML2_DCHUB_WATERMARK_SET_D: + wm_regs = &watermarks->dcn4x.d; + break; + case DML2_DCHUB_WATERMARK_SET_NUM: + default: + /* invalid wm set index */ + wm_regs = NULL; + } + + return wm_regs; +} + +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx) +{ + const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming; + + unsigned int wm_index; + + /* copy watermark sets from DML */ + for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) { + struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index); + + if (wm_regs) + memcpy(wm_regs, + &programming->global_regs.wm_regs[wm_index], + sizeof(struct dml2_dchub_watermark_regs)); + } +} + +void dml21_map_hw_resources(struct dml2_context *dml_ctx) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true; + } + +} + +void dml21_get_pipe_mcache_config( + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_pipe_configuration_descriptor *mcache_pipe_config) +{ + mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x; + mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width; + + mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x; + mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width; + + mcache_pipe_config->plane1_enabled = + dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); +} + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_pstate_method_fw_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h new file mode 100644 index 000000000000..9880d3e0398e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_translation_helper.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_TRANSLATION_HELPER_H_ +#define _DML21_TRANSLATION_HELPER_H_ + +struct dc; +struct dc_state; +struct dcn_watermarks; +union dcn_watermark_set; +struct pipe_ctx; +struct dc_plane_state; + +struct dml2_context; +struct dml2_configuration_options; +struct dml2_initialize_instance_in_out; + +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); +void dml21_map_hw_resources(struct dml2_context *dml_ctx); +void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c new file mode 100644 index 000000000000..ee721606b883 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml21_translation_helper.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml2_dc_resource_mgmt.h" + +#include "dml2_core_dcn4_calcs.h" + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) + return i; + } + + return -1; +} + +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) + return i; + } + + return -1; +} + +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + + return false; +} + +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id) +{ + return 0xffff & plane_id; +} + +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) { + *dml_pipe_idx = i; + return; + } + } +} + +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx) +{ + struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe); + + *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head); + + if (pipe->plane_state) + *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe); +} + +/* places pipe references into pipes arrays and returns number of pipes */ +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + int dml_plane_idx) +{ + unsigned int dml_stream_index; + unsigned int main_stream_id; + unsigned int dc_plane_index; + struct dc_stream_state *dc_main_stream; + struct dc_stream_status *dc_main_stream_status; + struct dc_plane_state *dc_main_plane; + struct dc_stream_state *dc_phantom_stream; + struct dc_stream_status *dc_phantom_stream_status; + struct dc_plane_state *dc_phantom_plane; + int num_pipes = 0; + + memset(dc_main_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); + memset(dc_phantom_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index; + main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]; + + dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id); + dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream); + if (!dc_main_stream_status) + return num_pipes; + + /* find main plane based on id */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]); + dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index]; + + if (dc_main_plane) { + num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes); + } else { + /* stream was configured with dummy plane, so get pipes from opp head */ + struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream); + if (otg_master_pipe != NULL) + num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes); + } + + /* if phantom exists, find associated pipes */ + dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream); + if (dc_phantom_stream && num_pipes > 0) { + dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream); + + if (dc_phantom_stream_status) { + /* phantom plane will have same index as main */ + dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index]; + + if (dc_phantom_plane) { + /* only care about phantom pipes if they contain the phantom plane */ + dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes); + } + } + } + + return num_pipes; +} + +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming) +{ + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + /* phantom has its own global sync */ + global_sync = &stream_programming->phantom_stream.global_sync; + } + + memcpy(&pipe_ctx->global_sync, + global_sync, + sizeof(union dml2_global_sync_programming)); +} + +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe) +{ + + /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ + /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ + if (dc_pipe->stream && dc_pipe->plane_state && + (dc_pipe->top_pipe == NULL || + dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) && + dc_pipe->prev_odm_pipe == NULL) { + /* SS: all active surfaces stored in MALL */ + if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) { + dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } else { + /* SUBVP: phantom surfaces only stored in MALL */ + dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } + } +} + +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +{ + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog) +{ + unsigned int pipe_reg_index = 0; + + dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog); + find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = false; + pipe_ctx->det_buffer_size_kb = 0; + } else { + memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; + pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; + } + + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; + + dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); + + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); +} + +static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *main_stream, + struct dml2_per_stream_programming *stream_programming) +{ + struct dc_stream_state *phantom_stream; + struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor; + + phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream); + if (!phantom_stream) + return NULL; + + /* copy details of phantom stream from main */ + memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst)); + + /* modify timing for phantom */ + phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch; + phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active; + phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total; + phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active; + + phantom_stream->src.y = 0; + phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height; + + phantom_stream->use_dynamic_meta = false; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream); + + return phantom_stream; +} + +static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *main_plane, + struct dml2_per_plane_programming *plane_programming) +{ + struct dc_plane_state *phantom_plane; + + phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane); + if (!phantom_plane) + return NULL; + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &main_plane->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc)); + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->src.height; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context); + + return phantom_plane; +} + +void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int dml_stream_index, dml_plane_index, dc_plane_index; + struct dc_stream_state *main_stream; + struct dc_stream_status *main_stream_status; + struct dc_stream_state *phantom_stream; + struct dc_plane_state *main_plane; + bool phantoms_added = false; + + /* create phantom streams and planes and add to context */ + for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) { + /* iterate through DML streams looking for phantoms */ + if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) { + /* find associated dc stream */ + main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]); + + main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream); + + if (!main_stream_status || main_stream_status->plane_count == 0) + continue; + + /* create phantom stream for subvp enabled stream */ + phantom_stream = dml21_add_phantom_stream(dml_ctx, + dc, + context, + main_stream, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]); + + if (!phantom_stream) + continue; + + /* iterate through DML planes associated with this stream */ + for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) { + /* find associated dc plane */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]); + main_plane = main_stream_status->plane_states[dc_plane_index]; + + /* create phantom planes for subvp enabled plane */ + dml21_add_phantom_plane(dml_ctx, + dc, + context, + phantom_stream, + main_plane, + &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]); + + phantoms_added = true; + } + } + } + } + + if (phantoms_added) + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state); +} + +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx) +{ + int i, j, k; + unsigned int num_fams2_streams = 0; + + /* reset fams2 data */ + memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); + + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; + enum fams2_stream_type type = 0; + + union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams]; + + struct dc_stream_state *stream = context->streams[i]; + + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; + } + + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } + + /* copy static state from PMO */ + memcpy(static_base_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, + sizeof(union dmub_cmd_fams2_config)); + + if (dc->debug.fams_version.major == 3) { + memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams], + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2, + sizeof(union dmub_fams2_stream_static_sub_state_v2)); + } else { + memcpy(static_sub_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, + sizeof(union dmub_cmd_fams2_config)); + } + + switch (dc->debug.fams_version.minor) { + case 1: + default: + type = static_base_state->stream_v1.base.type; + + /* get information from context */ + static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count; + static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst; + + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_base_state->stream_v1.base.pipe_mask |= (1 << k); + static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k); + } + } + } + } + + + /* get per method programming */ + switch (type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + if (!phantom_status) + break; + + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + } + } + break; + default: + ASSERT(false); + break; + } + + num_fams2_streams++; + } + } + + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; +} + +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) +{ + return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h new file mode 100644 index 000000000000..4bff52eaaef8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_utils.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_UTILS_H_ +#define _DML21_UTILS_H_ + +struct dc_state; +struct dc_plane_state; +struct pipe_ctx; + +struct dml2_context; +struct dml2_display_rq_regs; +struct dml2_display_dlg_regs; +struct dml2_display_ttu_regs; + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); +void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming); +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe); +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx); +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index); +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx); +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__], + int dml_plane_idx); +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog); +void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id); +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx); +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c new file mode 100644 index 000000000000..798abb2b2e67 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_internal_types.h" +#include "dml_top.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "dml2_dc_resource_mgmt.h" + +#define INVALID -1 + +static bool dml21_allocate_memory(struct dml2_context **dml_ctx) +{ + *dml_ctx = vzalloc(sizeof(struct dml2_context)); + if (!(*dml_ctx)) + return false; + + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); + if (!((*dml_ctx)->v21.dml_init.dml2_instance)) + return false; + + (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + + (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; + (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; + + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); + if (!((*dml_ctx)->v21.mode_programming.programming)) + return false; + + return true; +} + +static void dml21_populate_configuration_options(const struct dc *in_dc, + struct dml2_context *dml_ctx, + const struct dml2_configuration_options *config) +{ + dml_ctx->config = *config; + + /* UCLK P-State options */ + if (in_dc->debug.dml21_force_pstate_method) { + dml_ctx->config.pmo.force_pstate_method_enable = true; + for (int i = 0; i < MAX_PIPES; i++) + dml_ctx->config.pmo.force_pstate_method_values[i] = in_dc->debug.dml21_force_pstate_method_values[i]; + } else { + dml_ctx->config.pmo.force_pstate_method_enable = false; + } +} + +static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +{ + + dml_ctx->architecture = dml2_architecture_21; + + dml21_populate_configuration_options(in_dc, dml_ctx, config); + + DC_FP_START(); + + dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc); + + dml2_initialize_instance(&dml_ctx->v21.dml_init); + + DC_FP_END(); +} + +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dml_ctx)) + return false; + + dml21_init(in_dc, *dml_ctx, config); + + return true; +} + +void dml21_destroy(struct dml2_context *dml2) +{ + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); +} + +static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, + struct dml2_context *in_ctx, unsigned int pipe_cnt) +{ + unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0; + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_per_stream_programming *stream_prog = NULL; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + int num_pipes; + unsigned int dml_phantom_prog_idx; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + /* copy global DCHUBBUB arbiter registers */ + memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs)); + + /* legacy only */ + context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64; + + context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0; + + /* phantom's start after main planes */ + dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes; + + for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) { + pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + if (!pln_prog->plane_descriptor) + continue; + + stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index]; + num_dpps_required = pln_prog->num_dpps_required; + + if (num_dpps_required == 0) { + continue; + } + num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + + if (num_pipes <= 0) + continue; + + /* program each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog); + + if (pln_prog->phantom_plane.valid && dc_phantom_pipes[dc_pipe_index]) { + dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog); + } + } + + /* copy per plane mcache allocation */ + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + if (pln_prog->phantom_plane.valid) { + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx], + &pln_prog->phantom_plane.mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + + dml_phantom_prog_idx++; + } + } + + /* assign global clocks */ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000; + } + + if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000; + } + + /* get global mall allocation */ + if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) { + context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes); + } else { + context->bw_ctx.bw.dcn.clk.num_ways = 0; + } +} + +static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params) +{ + int dc_plane_idx = 0; + int dml_prog_idx, stream_idx, plane_idx; + struct dml2_per_plane_programming *pln_prog = NULL; + + for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) { + for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) { + dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context); + if (dml_prog_idx == INVALID) { + continue; + } + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid; + mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0; + mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1; + mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache; + mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1; + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0, + pln_prog->mcache_allocation.mcache_x_offsets_plane0, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1, + pln_prog->mcache_allocation.mcache_x_offsets_plane1, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + dc_plane_idx++; + } + } +} + +static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool result = false; + struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming; + struct dc_mcache_params mcache_params[MAX_PLANES] = {0}; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out)); + + if (!context) + return true; + + if (context->stream_count == 0) { + dml21_build_fams2_programming(in_dc, context, dml_ctx); + return true; + } + + /* scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + /* Populate stream, plane mappings and other fields in display config. */ + result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + if (!result) + return false; + + DC_FP_START(); + result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); + if (!result) + return false; + + /* Check and map HW resources */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_map_hw_resources(dml_ctx); + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state); + /* if subvp phantoms are present, expand them into dc context */ + dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx); + + if (in_dc->res_pool->funcs->program_mcache_pipe_config) { + //Prepare mcache params for each plane based on mcache output from DML + dml21_prepare_mcache_params(dml_ctx, context, mcache_params); + + //populate mcache regs to each pipe + dml_ctx->config.callbacks.allocate_mcache(context, mcache_params); + } + } + + /* Copy DML CLK, WM and REG outputs to bandwidth context */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); + dml21_copy_clocks_to_dc_state(dml_ctx, context); + dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); + dml21_build_fams2_programming(in_dc, context, dml_ctx); + } + + return true; +} + +static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool is_supported = false; + struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init; + struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out)); + + if (!context || context->stream_count == 0) + return true; + + /* Scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + mode_support->dml2_instance = dml_init->dml2_instance; + dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); + is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); + if (!is_supported) + return false; + + return true; +} + +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode) +{ + bool out = false; + + /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + out = dml21_check_mode_support(in_dc, context, dml_ctx); + else + out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + + return out; +} + +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index; + int num_pipes; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL; + struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals; + + if (context->stream_count == 0) { + return; + } + + memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out)); + l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance; + + /* phantom's start after main planes */ + dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes; + + /* Build mcache programming parameters per plane per pipe */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + if (pln_prog->num_dpps_required == 0) { + continue; + } + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid && + dc_phantom_pipes[0] && + dc_main_pipes[0]->stream && + dc_phantom_pipes[0]->plane_state) { + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } + + /* Call to generate mcache programming per plane per pipe for the given display configuration */ + dml2_build_mcache_programming(&l->build_mcache_programming_params); + + /* get per plane per pipe mcache programming */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) { + memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid && + dc_phantom_pipes[0] && + dc_main_pipes[0]->stream && + dc_phantom_pipes[0]->plane_state) { + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) { + memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } +} + +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Preserve references to internals */ + struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance; + struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming; + + /* Copy context */ + memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context)); + + /* Copy Internals */ + memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance)); + memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming)); + + /* Restore references to internals */ + dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance; + dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config; + dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config; + + dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + + DC_FP_START(); + + /* need to initialize copied instance for internal references to be correct */ + dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); + + DC_FP_END(); +} + +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dst_dml_ctx)) + return false; + + dml21_copy(*dst_dml_ctx, src_dml_ctx); + + return true; +} + +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +{ + dml21_init(in_dc, dml_ctx, config); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h new file mode 100644 index 000000000000..15f92029d2e5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/dml21_wrapper.h @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_WRAPPER_H_ +#define _DML21_WRAPPER_H_ + +#include "os_types.h" +#include "dml_top_soc_parameter_types.h" +#include "dml_top_display_cfg_types.h" + +struct dc; +struct dc_state; +struct dml2_configuration_options; +struct dml2_context; +enum dc_validate_mode; + +/** + * dml2_create - Creates dml21_context. + * @in_dc: dc. + * @dml2: Created dml21 context. + * @config: dml21 configuration options. + * + * Create of DML21 is done as part of dc_state creation. + * DML21 IP, SOC and STATES are initialized at + * creation time. + * + * Return: True if dml2 is successfully created, false otherwise. + */ +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); +void dml21_destroy(struct dml2_context *dml2); +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx); +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx); +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config); + +/** + * dml21_validate - Determines if a display configuration is supported or not. + * @in_dc: dc. + * @context: dc_state to be validated. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX + * will not populate context.res_ctx. + * + * Based on fast_validate option internally would call: + * + * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option + * Calculates if dc_state can be supported on the input display + * configuration. If supported, generates the necessary HW + * programming for the new dc_state. + * + * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option + * Calculates if dc_state can be supported for the input display + * config. + + * Context: Two threads may not invoke this function concurrently unless they reference + * separate dc_states for validation. + * Return: True if mode is supported, false otherwise. + */ +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode); + +/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */ +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); + +/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */ +struct socbb_ip_params_external { + struct dml2_ip_capabilities ip_params; + struct dml2_soc_bb soc_bb; +}; + +/*mcache parameters decided by dml*/ +struct dc_mcache_params { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h new file mode 100644 index 000000000000..16a4f97bca4e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_DML_DCN4_SOC_BB__ +#define __DML_DML_DCN4_SOC_BB__ + +#include "dml_top_soc_parameter_types.h" + +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 33, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 12, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4x = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 257, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + .minimum_uclk_khz = 97 * 1000, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 61, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 118, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4x, +}; + +static const struct dml2_soc_bb dml2_socbb_dcn401 = { + .clk_table = { + .uclk = { + .clk_values_khz = {97000}, + .num_clk_values = 1, + }, + .fclk = { + .clk_values_khz = {300000, 2500000}, + .num_clk_values = 2, + }, + .dcfclk = { + .clk_values_khz = {200000, 1564000}, + .num_clk_values = 2, + }, + .dispclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dppclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dtbclk = { + .clk_values_khz = {100000, 1564000}, + .num_clk_values = 2, + }, + .phyclk = { + .clk_values_khz = {810000, 810000}, + .num_clk_values = 2, + }, + .socclk = { + .clk_values_khz = {300000, 1200000}, + .num_clk_values = 2, + }, + .dscclk = { + .clk_values_khz = {666667, 666667}, + .num_clk_values = 2, + }, + .phyclk_d18 = { + .clk_values_khz = {625000, 625000}, + .num_clk_values = 2, + }, + .phyclk_d32 = { + .clk_values_khz = {625000, 625000}, + .num_clk_values = 2, + }, + .dram_config = { + .channel_width_bytes = 2, + .channel_count = 16, + .transactions_per_clock = 16, + }, + }, + + .qos_parameters = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 15, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 30, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 0, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4x = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 260, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + // State 1 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 72, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 117, + }, + { + // State 2 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 546, + .trip_to_memory_uclk_cycles = 848, + .meta_trip_to_memory_uclk_cycles = 848, + .maximum_latency_when_urgent_uclk_cycles = 146, + .average_latency_when_urgent_uclk_cycles = 146, + .maximum_latency_when_non_urgent_uclk_cycles = 848, + .average_latency_when_non_urgent_uclk_cycles = 133, + }, + { + // State 3 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 564, + .trip_to_memory_uclk_cycles = 853, + .meta_trip_to_memory_uclk_cycles = 853, + .maximum_latency_when_urgent_uclk_cycles = 164, + .average_latency_when_urgent_uclk_cycles = 164, + .maximum_latency_when_non_urgent_uclk_cycles = 853, + .average_latency_when_non_urgent_uclk_cycles = 136, + }, + { + // State 4 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 613, + .trip_to_memory_uclk_cycles = 869, + .meta_trip_to_memory_uclk_cycles = 869, + .maximum_latency_when_urgent_uclk_cycles = 213, + .average_latency_when_urgent_uclk_cycles = 213, + .maximum_latency_when_non_urgent_uclk_cycles = 869, + .average_latency_when_non_urgent_uclk_cycles = 149, + }, + { + // State 5 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 632, + .trip_to_memory_uclk_cycles = 874, + .meta_trip_to_memory_uclk_cycles = 874, + .maximum_latency_when_urgent_uclk_cycles = 232, + .average_latency_when_urgent_uclk_cycles = 232, + .maximum_latency_when_non_urgent_uclk_cycles = 874, + .average_latency_when_non_urgent_uclk_cycles = 153, + }, + { + // State 6 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 665, + .trip_to_memory_uclk_cycles = 885, + .meta_trip_to_memory_uclk_cycles = 885, + .maximum_latency_when_urgent_uclk_cycles = 265, + .average_latency_when_urgent_uclk_cycles = 265, + .maximum_latency_when_non_urgent_uclk_cycles = 885, + .average_latency_when_non_urgent_uclk_cycles = 161, + }, + { + // State 7 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 689, + .trip_to_memory_uclk_cycles = 895, + .meta_trip_to_memory_uclk_cycles = 895, + .maximum_latency_when_urgent_uclk_cycles = 289, + .average_latency_when_urgent_uclk_cycles = 289, + .maximum_latency_when_non_urgent_uclk_cycles = 895, + .average_latency_when_non_urgent_uclk_cycles = 167, + }, + { + // State 8 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 716, + .trip_to_memory_uclk_cycles = 902, + .meta_trip_to_memory_uclk_cycles = 902, + .maximum_latency_when_urgent_uclk_cycles = 316, + .average_latency_when_urgent_uclk_cycles = 316, + .maximum_latency_when_non_urgent_uclk_cycles = 902, + .average_latency_when_non_urgent_uclk_cycles = 174, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4x, + }, + + .power_management_parameters = { + .dram_clk_change_blackout_us = 400, + .fclk_change_blackout_us = 0, + .g7_ppt_blackout_us = 0, + .stutter_enter_plus_exit_latency_us = 54, + .stutter_exit_latency_us = 41, + .z8_stutter_enter_plus_exit_latency_us = 0, + .z8_stutter_exit_latency_us = 0, + /* + .g6_temp_read_blackout_us = { + 23.00, + 10.00, + 10.00, + 8.00, + 8.00, + 5.00, + 5.00, + 5.00, + }, + */ + }, + + .vmin_limit = { + .dispclk_khz = 600 * 1000, + }, + + .dprefclk_mhz = 720, + .xtalclk_mhz = 100, + .pcie_refclk_mhz = 100, + .dchub_refclk_mhz = 50, + .mall_allocated_for_dcn_mbytes = 64, + .max_outstanding_reqs = 512, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .hostvm_min_page_size_kbytes = 0, + .gpuvm_min_page_size_kbytes = 256, + .phy_downspread_percent = 0.38, + .dcn_downspread_percent = 0.38, + .dispclk_dppclk_vco_speed_mhz = 4500, + .do_urgent_latency_adjustment = 0, + .mem_word_bytes = 32, + .num_dcc_mcaches = 8, + .mcache_size_bytes = 2048, + .mcache_line_size_bytes = 32, + .max_fclk_for_uclk_dpm_khz = 1250 * 1000, +}; + +static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { + .pipe_count = 4, + .otg_count = 4, + .num_dsc = 4, + .max_num_dp2p0_streams = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_dp2p0_outputs = 4, + .rob_buffer_size_kbytes = 192, + .config_return_buffer_size_in_kbytes = 1344, + .config_return_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .compressed_buffer_segment_size_in_kbytes = 64, + .cursor_buffer_size = 24, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, + .subvp_drr_scheduling_margin_us = 100, + .subvp_prefetch_end_to_mall_start_us = 15, + .subvp_fw_processing_delay = 15, + .max_vactive_det_fill_delay_us = 400, + + .fams2 = { + .max_allow_delay_us = 100 * 1000, + .scheduling_delay_us = 550, + .vertical_interrupt_ack_delay_us = 40, + .allow_programming_delay_us = 18, + .min_allow_width_us = 20, + .subvp_df_throttle_delay_us = 100, + .subvp_programming_delay_us = 200, + .subvp_prefetch_to_mall_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, + }, +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h new file mode 100644 index 000000000000..281d7ad230d8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml2_external_lib_deps.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_EXTERNAL_LIB_DEPS__ +#define __DML2_EXTERNAL_LIB_DEPS__ + +#include "os_types.h" + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h new file mode 100644 index 000000000000..a64ec4dcf11a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_H__ +#define __DML_TOP_H__ + +#include "dml_top_types.h" + +/* + * Top Level Interface for DML2 + */ + +/* + * Returns the size of the DML instance for the caller to allocate + */ +unsigned int dml2_get_instance_size_bytes(void); + +/* + * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...) + */ +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out); + +/* + * Determines if the input mode is supported (boolean) on the SoC at all. Does not return + * information on how mode should be programmed. + */ +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out); + +/* + * Determines the full (optimized) programming for the input mode. Returns minimum + * clocks as well as dchub register programming values for all pipes, additional meta + * such as ODM or MPCC combine factors. + */ +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out); + +/* + * Determines the correct per pipe mcache register programming for a valid mode. + * The mcache allocation must have been calculated (successfully) in a previous + * call to dml2_build_mode_programming. + * The actual hubp viewport dimensions be what the actual registers will be + * programmed to (i.e. based on scaler setup). + */ +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h new file mode 100644 index 000000000000..8e5a30287220 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_dchub_registers.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __dml2_TOP_DCHUB_REGISTERS_H__ +#define __dml2_TOP_DCHUB_REGISTERS_H__ + +#include "dml2_external_lib_deps.h" +// These types are uint32_t as they represent actual calculated register values for HW + +struct dml2_display_dlg_regs { + uint32_t refcyc_h_blank_end; + uint32_t dlg_vblank_end; + uint32_t min_dst_y_next_start; + uint32_t refcyc_per_htotal; + uint32_t refcyc_x_after_scaler; + uint32_t dst_y_after_scaler; + uint32_t dst_y_prefetch; + uint32_t dst_y_per_vm_vblank; + uint32_t dst_y_per_row_vblank; + uint32_t dst_y_per_vm_flip; + uint32_t dst_y_per_row_flip; + uint32_t ref_freq_to_pix_freq; + uint32_t vratio_prefetch; + uint32_t vratio_prefetch_c; + uint32_t refcyc_per_tdlut_group; + uint32_t refcyc_per_pte_group_vblank_l; + uint32_t refcyc_per_pte_group_vblank_c; + uint32_t refcyc_per_pte_group_flip_l; + uint32_t refcyc_per_pte_group_flip_c; + uint32_t dst_y_per_pte_row_nom_l; + uint32_t dst_y_per_pte_row_nom_c; + uint32_t refcyc_per_pte_group_nom_l; + uint32_t refcyc_per_pte_group_nom_c; + uint32_t refcyc_per_line_delivery_pre_l; + uint32_t refcyc_per_line_delivery_pre_c; + uint32_t refcyc_per_line_delivery_l; + uint32_t refcyc_per_line_delivery_c; + uint32_t refcyc_per_vm_group_vblank; + uint32_t refcyc_per_vm_group_flip; + uint32_t refcyc_per_vm_req_vblank; + uint32_t refcyc_per_vm_req_flip; + uint32_t dst_y_offset_cur0; + uint32_t chunk_hdl_adjust_cur0; + uint32_t vready_after_vcount0; + uint32_t dst_y_delta_drq_limit; + uint32_t refcyc_per_vm_dmdata; + uint32_t dmdata_dl_delta; + + // MRQ + uint32_t refcyc_per_meta_chunk_vblank_l; + uint32_t refcyc_per_meta_chunk_vblank_c; + uint32_t refcyc_per_meta_chunk_flip_l; + uint32_t refcyc_per_meta_chunk_flip_c; + uint32_t dst_y_per_meta_row_nom_l; + uint32_t dst_y_per_meta_row_nom_c; + uint32_t refcyc_per_meta_chunk_nom_l; + uint32_t refcyc_per_meta_chunk_nom_c; +}; + +struct dml2_display_ttu_regs { + uint32_t qos_level_low_wm; + uint32_t qos_level_high_wm; + uint32_t min_ttu_vblank; + uint32_t qos_level_flip; + uint32_t refcyc_per_req_delivery_l; + uint32_t refcyc_per_req_delivery_c; + uint32_t refcyc_per_req_delivery_cur0; + uint32_t refcyc_per_req_delivery_pre_l; + uint32_t refcyc_per_req_delivery_pre_c; + uint32_t refcyc_per_req_delivery_pre_cur0; + uint32_t qos_level_fixed_l; + uint32_t qos_level_fixed_c; + uint32_t qos_level_fixed_cur0; + uint32_t qos_ramp_disable_l; + uint32_t qos_ramp_disable_c; + uint32_t qos_ramp_disable_cur0; +}; + +struct dml2_display_arb_regs { + uint32_t max_req_outstanding; + uint32_t min_req_outstanding; + uint32_t sat_level_us; + uint32_t hvm_max_qos_commit_threshold; + uint32_t hvm_min_req_outstand_commit_threshold; + uint32_t compbuf_reserved_space_kbytes; + uint32_t compbuf_size; + uint32_t sdpif_request_rate_limit; + uint32_t allow_sdpif_rate_limit_when_cstate_req; + uint32_t dcfclk_deep_sleep_hysteresis; + uint32_t pstate_stall_threshold; +}; + +struct dml2_cursor_dlg_regs{ + uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET + uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET + uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST + + uint32_t qos_level_fixed; + uint32_t qos_ramp_disable; +}; + +struct dml2_display_plane_rq_regs { + uint32_t chunk_size; + uint32_t min_chunk_size; + uint32_t dpte_group_size; + uint32_t mpte_group_size; + uint32_t swath_height; + uint32_t pte_row_height_linear; + + // MRQ + uint32_t meta_chunk_size; + uint32_t min_meta_chunk_size; +}; + +struct dml2_display_rq_regs { + struct dml2_display_plane_rq_regs rq_regs_l; + struct dml2_display_plane_rq_regs rq_regs_c; + uint32_t drq_expansion_mode; + uint32_t prq_expansion_mode; + uint32_t crq_expansion_mode; + uint32_t plane1_base_address; + uint32_t unbounded_request_enabled; + + // MRQ + uint32_t mrq_expansion_mode; +}; + +struct dml2_display_mcache_regs { + uint32_t mcache_id_first; + uint32_t mcache_id_second; + uint32_t split_location; +}; + +struct dml2_hubp_pipe_mcache_regs { + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } main; + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } mall; +}; + +struct dml2_dchub_per_pipe_register_set { + struct dml2_display_rq_regs rq_regs; + struct dml2_display_ttu_regs ttu_regs; + struct dml2_display_dlg_regs dlg_regs; + + uint32_t det_size; +}; + +struct dml2_dchub_watermark_regs { + /* watermarks */ + uint32_t urgent; + uint32_t sr_enter; + uint32_t sr_exit; + uint32_t sr_enter_z8; + uint32_t sr_exit_z8; + uint32_t sr_enter_low_power; + uint32_t sr_exit_low_power; + uint32_t uclk_pstate; + uint32_t fclk_pstate; + uint32_t temp_read_or_ppt; + uint32_t usr; + /* qos */ + uint32_t refcyc_per_trip_to_mem; + uint32_t refcyc_per_meta_trip_to_mem; + uint32_t frac_urg_bw_flip; + uint32_t frac_urg_bw_nom; + uint32_t frac_urg_bw_mall; +}; + +enum dml2_dchub_watermark_reg_set_index { + DML2_DCHUB_WATERMARK_SET_A = 0, + DML2_DCHUB_WATERMARK_SET_B = 1, + DML2_DCHUB_WATERMARK_SET_C = 2, + DML2_DCHUB_WATERMARK_SET_D = 3, + DML2_DCHUB_WATERMARK_SET_NUM = 4, +}; + +struct dml2_dchub_global_register_set { + struct dml2_display_arb_regs arb_regs; + struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM]; + unsigned int num_watermark_sets; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h new file mode 100644 index 000000000000..13749c9fcf18 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_display_cfg_types.h @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ +#define __DML_TOP_DISPLAY_CFG_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML2_MAX_PLANES 8 +#define DML2_MAX_DCN_PIPES 8 +#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches +#define DML2_MAX_WRITEBACK 3 + +enum dml2_swizzle_mode { + dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled + dml2_sw_256b_2d, + dml2_sw_4kb_2d, + dml2_sw_64kb_2d, + dml2_sw_256kb_2d, + + dml2_gfx11_sw_linear, + dml2_gfx11_sw_64kb_d, + dml2_gfx11_sw_64kb_d_t, + dml2_gfx11_sw_64kb_d_x, + dml2_gfx11_sw_64kb_r_x, + dml2_gfx11_sw_256kb_d_x, + dml2_gfx11_sw_256kb_r_x, + +}; + +enum dml2_source_format_class { + dml2_444_8 = 0, + dml2_444_16 = 1, + dml2_444_32 = 2, + dml2_444_64 = 3, + dml2_420_8 = 4, + dml2_420_10 = 5, + dml2_420_12 = 6, + dml2_rgbe_alpha = 9, + dml2_rgbe = 10, + dml2_mono_8 = 11, + dml2_mono_16 = 12, + dml2_422_planar_8 = 13, + dml2_422_planar_10 = 14, + dml2_422_planar_12 = 15, + dml2_422_packed_8 = 16, + dml2_422_packed_10 = 17, + dml2_422_packed_12 = 18 +}; + +enum dml2_sample_positioning { + dml2_interstitial = 0, + dml2_cosited = 1 +}; + +enum dml2_rotation_angle { + dml2_rotation_0 = 0, + dml2_rotation_90 = 1, + dml2_rotation_180 = 2, + dml2_rotation_270 = 3 +}; + +enum dml2_output_format_class { + dml2_444 = 0, + dml2_s422 = 1, + dml2_n422 = 2, + dml2_420 = 3 +}; + +enum dml2_output_encoder_class { + dml2_dp = 0, + dml2_edp = 1, + dml2_dp2p0 = 2, + dml2_hdmi = 3, + dml2_hdmifrl = 4, + dml2_none = 5 +}; + +enum dml2_output_link_dp_rate { + dml2_dp_rate_na = 0, + dml2_dp_rate_hbr = 1, + dml2_dp_rate_hbr2 = 2, + dml2_dp_rate_hbr3 = 3, + dml2_dp_rate_uhbr10 = 4, + dml2_dp_rate_uhbr13p5 = 5, + dml2_dp_rate_uhbr20 = 6 +}; + +enum dml2_uclk_pstate_change_strategy { + dml2_uclk_pstate_change_strategy_auto = 0, + dml2_uclk_pstate_change_strategy_force_vactive = 1, + dml2_uclk_pstate_change_strategy_force_vblank = 2, + dml2_uclk_pstate_change_strategy_force_drr = 3, + dml2_uclk_pstate_change_strategy_force_mall_svp = 4, + dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5, +}; + +enum dml2_svp_mode_override { + dml2_svp_mode_override_auto = 0, + dml2_svp_mode_override_main_pipe = 1, + dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions + dml2_svp_mode_override_phantom_pipe_no_data_return = 3, + dml2_svp_mode_override_imall = 4 +}; + +enum dml2_refresh_from_mall_mode_override { + dml2_refresh_from_mall_mode_override_auto = 0, + dml2_refresh_from_mall_mode_override_force_disable = 1, + dml2_refresh_from_mall_mode_override_force_enable = 2 +}; + +enum dml2_odm_mode { + dml2_odm_mode_auto = 0, + dml2_odm_mode_bypass, + dml2_odm_mode_combine_2to1, + dml2_odm_mode_combine_3to1, + dml2_odm_mode_combine_4to1, + dml2_odm_mode_split_1to2, + dml2_odm_mode_mso_1to2, + dml2_odm_mode_mso_1to4 +}; + +enum dml2_scaling_transform { + dml2_scaling_transform_explicit = 0, + dml2_scaling_transform_fullscreen, + dml2_scaling_transform_aspect_ratio, + dml2_scaling_transform_centered +}; + +enum dml2_dsc_enable_option { + dml2_dsc_disable = 0, + dml2_dsc_enable = 1, + dml2_dsc_enable_if_necessary = 2 +}; + +enum dml2_tdlut_addressing_mode { + dml2_tdlut_sw_linear = 0, + dml2_tdlut_simple_linear = 1 +}; + +enum dml2_tdlut_width_mode { + dml2_tdlut_width_17_cube = 0, + dml2_tdlut_width_33_cube = 1 +}; + +enum dml2_twait_budgeting_setting { + dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait + + dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed + //(i.e. UCLK/FCLK DPM cannot be supported in active) + + dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that + // can support it +}; + +struct dml2_get_cursor_dlg_reg{ + unsigned int cursor_x_position; + unsigned int cursor_hotspot_x; + unsigned int cursor_primary_offset; + unsigned int cursor_secondary_offset; + bool cursor_stereo_en; + bool cursor_2x_magnify; + double hratio; + double pixel_rate_mhz; + double dlg_refclk_mhz; +}; + +/// @brief Surface Parameters +struct dml2_surface_cfg { + enum dml2_swizzle_mode tiling; + + struct { + unsigned long pitch; // In elements, two pixels per element in 422 packed format + unsigned long width; + unsigned long height; + } plane0; + + + struct { + unsigned long pitch; + unsigned long width; + unsigned long height; + } plane1; + + struct { + bool enable; + struct { + unsigned long pitch; + } plane0; + struct { + unsigned long pitch; + } plane1; + + struct { + double dcc_rate_plane0; + double dcc_rate_plane1; + double fraction_of_zero_size_request_plane0; + double fraction_of_zero_size_request_plane1; + } informative; + } dcc; +}; + + +struct dml2_composition_cfg { + enum dml2_rotation_angle rotation_angle; + bool mirrored; + enum dml2_scaling_transform scaling_transform; + bool rect_out_height_spans_vactive; + + struct { + bool stationary; + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane0; + + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane1; + } viewport; + + struct { + bool enabled; + bool easf_enabled; + bool isharp_enabled; + bool upsp_enabled; + enum dml2_sample_positioning upsp_sample_positioning; + unsigned int upsp_vtaps; + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane0; + + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane1; + + unsigned long rect_out_width; + } scaler_info; +}; + +struct dml2_timing_cfg { + unsigned long h_total; + unsigned long v_total; + unsigned long h_blank_end; + unsigned long v_blank_end; + unsigned long h_front_porch; + unsigned long v_front_porch; + unsigned long h_sync_width; + unsigned long pixel_clock_khz; + unsigned long h_active; + unsigned long v_active; + unsigned int bpc; //FIXME: review with Jun + struct { + enum dml2_dsc_enable_option enable; + unsigned int dsc_compressed_bpp_x16; + struct { + // for dv to specify num dsc slices to use + unsigned int num_slices; + } overrides; + } dsc; + bool interlaced; + struct { + /* static */ + bool enabled; + unsigned long min_refresh_uhz; + unsigned int max_instant_vtotal_delta; + /* dynamic */ + bool disallowed; + bool drr_active_variable; + bool drr_active_fixed; + } drr_config; + unsigned long vblank_nom; +}; + +struct dml2_link_output_cfg { + enum dml2_output_format_class output_format; + enum dml2_output_encoder_class output_encoder; + unsigned int output_dp_lane_count; + enum dml2_output_link_dp_rate output_dp_link_rate; + unsigned long audio_sample_rate; + unsigned long audio_sample_layout; + bool output_disabled; // The stream does not go to a backend for output to a physical + //connector (e.g. writeback only, phantom pipe) goes to writeback + bool validate_output; // Do not validate the link configuration for this display stream. +}; + +struct dml2_writeback_info { + enum dml2_source_format_class pixel_format; + unsigned long input_width; + unsigned long input_height; + unsigned long output_width; + unsigned long output_height; + unsigned long v_taps; + unsigned long h_taps; + unsigned long v_taps_chroma; + unsigned long h_taps_chroma; + double h_ratio; + double v_ratio; +}; + +struct dml2_writeback_cfg { + unsigned int active_writebacks_per_stream; + struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK]; +}; + +struct dml2_plane_parameters { + unsigned int stream_index; // Identifies which plane will be composed + + enum dml2_source_format_class pixel_format; + /* + * The surface and composition structures use + * the terms plane0 and plane1. These planes + * are expected to hold the following data based + * on the pixel format. + * + * RGB or YUV Non-Planar Types: + * dml2_444_8 + * dml2_444_16 + * dml2_444_32 + * dml2_444_64 + * dml2_rgbe + * + * plane0 = argb or rgbe + * plane1 = not used + * + * YUV Planar-Types: + * dml2_420_8 + * dml2_420_10 + * dml2_420_12 + * + * plane0 = luma + * plane1 = chroma + * + * RGB Planar Types: + * dml2_rgbe_alpha + * + * plane0 = rgbe + * plane1 = alpha + * + * Mono Non-Planar Types: + * dml2_mono_8 + * dml2_mono_16 + * + * plane0 = luma + * plane1 = not used + */ + + struct dml2_surface_cfg surface; + struct dml2_composition_cfg composition; + + struct { + bool enable; + unsigned long lines_before_active_required; + unsigned long transmitted_bytes; + } dynamic_meta_data; + + struct { + unsigned int num_cursors; + unsigned long cursor_width; + unsigned long cursor_bpp; + } cursor; + + // For TDLUT, SW would assume TDLUT is setup and enable all the time and + // budget for worst case addressing/width mode + struct { + bool setup_for_tdlut; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode; + enum dml2_tdlut_width_mode tdlut_width_mode; + bool tdlut_mpc_width_flag; + } tdlut; + + bool immediate_flip; + + struct { + // Logical overrides to power management policies (usually) + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + enum dml2_refresh_from_mall_mode_override refresh_from_mall; + unsigned int det_size_override_kb; + unsigned int mpcc_combine_factor; + + // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait + // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override. + long reserved_vblank_time_ns; + unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay + unsigned int vactive_latency_to_hide_for_pstate_admissibility_us; + unsigned int gpuvm_min_page_size_kbytes; + unsigned int hostvm_min_page_size_kbytes; + + enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config + + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + bool force_one_row_for_frame; + struct { + bool enable; + bool value; + } force_pte_buffer_mode; + double dppclk_mhz; + } hw; + } overrides; +}; + +struct dml2_stream_parameters { + struct dml2_timing_cfg timing; + struct dml2_link_output_cfg output; + struct dml2_writeback_cfg writeback; + + struct { + enum dml2_odm_mode odm_mode; + bool disable_dynamic_odm; + bool disable_subvp; + int minimum_vblank_idle_requirement_us; + bool minimize_active_latency_hiding; + + struct { + struct { + enum dml2_twait_budgeting_setting uclk_pstate; + enum dml2_twait_budgeting_setting fclk_pstate; + enum dml2_twait_budgeting_setting stutter_enter_exit; + } twait_budgeting; + } hw; + } overrides; +}; + +struct dml2_display_cfg { + bool gpuvm_enable; + bool ffbm_enable; + bool hostvm_enable; + + // Allocate DET proportionally between streams based on pixel rate + // and then allocate proportionally between planes. + bool minimize_det_reallocation; + + unsigned int gpuvm_max_page_table_levels; + unsigned int hostvm_max_non_cached_page_table_levels; + + struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES]; + struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES]; + + unsigned int num_planes; + unsigned int num_streams; + + struct { + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + struct { + bool enable; + bool value; + } force_unbounded_requesting; + + struct { + bool enable; + bool value; + } force_nom_det_size_kbytes; + + bool mode_support_check_disable; + bool mcache_admissibility_check_disable; + bool surface_viewport_size_check_disable; + double dlg_ref_clk_mhz; + double dispclk_mhz; + double dcfclk_mhz; + bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode + } hw; + + struct { + bool uclk_pstate_change_disable; + bool fclk_pstate_change_disable; + bool g6_temp_read_pstate_disable; + bool g7_ppt_pstate_disable; + } power_management; + + bool enhanced_prefetch_schedule_acceleration; + bool dcc_programming_assumes_scan_direction_unknown; + bool synchronize_timings; + bool synchronize_ddr_displays_for_uclk_pstate_change; + bool max_outstanding_when_urgent_expected_disable; + bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming + unsigned int best_effort_min_active_latency_hiding_us; + bool all_streams_blanked; + } overrides; +}; + +struct dml2_pipe_configuration_descriptor { + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane0; + + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane1; + + bool plane1_enabled; + bool imall_enabled; +}; + +struct dml2_plane_mcache_configuration_descriptor { + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_mcache_surface_allocation *mcache_allocation; + + struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES]; + char num_pipes; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h new file mode 100644 index 000000000000..8f624a912e78 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_policy_types.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_POLICY_TYPES_H__ +#define __DML_TOP_POLICY_TYPES_H__ + +struct dml2_policy_parameters { + unsigned long odm_combine_dispclk_threshold_khz; + unsigned int max_immediate_flip_latency; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h new file mode 100644 index 000000000000..4a9a0d5a09b7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ +#define __DML_TOP_SOC_PARAMETER_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML_MAX_CLK_TABLE_SIZE 20 + +struct dml2_soc_derate_values { + unsigned int dram_derate_percent_pixel; + unsigned int dram_derate_percent_vm; + unsigned int dram_derate_percent_pixel_and_vm; + + unsigned int fclk_derate_percent; + unsigned int dcfclk_derate_percent; +}; + +struct dml2_soc_derates { + struct dml2_soc_derate_values system_active_urgent; + struct dml2_soc_derate_values system_active_average; + struct dml2_soc_derate_values dcn_mall_prefetch_urgent; + struct dml2_soc_derate_values dcn_mall_prefetch_average; + struct dml2_soc_derate_values system_idle_average; +}; + +struct dml2_dcn32x_soc_qos_params { + struct { + unsigned int base_latency_us; + unsigned int base_latency_pixel_vm_us; + unsigned int base_latency_vm_us; + unsigned int scaling_factor_fclk_us; + unsigned int scaling_factor_mhz; + } urgent_latency_us; + + unsigned int loaded_round_trip_latency_fclk_cycles; + unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes; + unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes; +}; + +struct dml2_dcn4_uclk_dpm_dependent_qos_params { + unsigned long minimum_uclk_khz; + unsigned int urgent_ramp_uclk_cycles; + unsigned int trip_to_memory_uclk_cycles; + unsigned int meta_trip_to_memory_uclk_cycles; + unsigned int maximum_latency_when_urgent_uclk_cycles; + unsigned int average_latency_when_urgent_uclk_cycles; + unsigned int maximum_latency_when_non_urgent_uclk_cycles; + unsigned int average_latency_when_non_urgent_uclk_cycles; +}; + +struct dml2_dcn4x_soc_qos_params { + unsigned int df_qos_response_time_fclk_cycles; + unsigned int max_round_trip_to_furthest_cs_fclk_cycles; + unsigned int mall_overhead_fclk_cycles; + unsigned int meta_trip_adder_fclk_cycles; + unsigned int average_transport_distance_fclk_cycles; + double umc_urgent_ramp_latency_margin; + double umc_max_latency_margin; + double umc_average_latency_margin; + double fabric_max_transport_latency_margin; + double fabric_average_transport_latency_margin; + struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE]; +}; + +enum dml2_qos_param_type { + dml2_qos_param_type_dcn3, + dml2_qos_param_type_dcn4x +}; + +struct dml2_soc_qos_parameters { + struct dml2_soc_derates derate_table; + struct { + unsigned int base_latency_us; + unsigned int scaling_factor_us; + unsigned int scaling_factor_mhz; + } writeback; + + union { + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; + } qos_params; + + enum dml2_qos_param_type qos_type; +}; + +struct dml2_soc_power_management_parameters { + double dram_clk_change_blackout_us; + double dram_clk_change_read_only_us; + double dram_clk_change_write_only_us; + double fclk_change_blackout_us; + double g7_ppt_blackout_us; + double g7_temperature_read_blackout_us; + double stutter_enter_plus_exit_latency_us; + double stutter_exit_latency_us; + double low_power_stutter_enter_plus_exit_latency_us; + double low_power_stutter_exit_latency_us; + double z8_stutter_enter_plus_exit_latency_us; + double z8_stutter_exit_latency_us; + double z8_min_idle_time; + double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE]; + double type_b_dram_clk_change_blackout_us; + double type_b_ppt_blackout_us; +}; + +struct dml2_clk_table { + unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE]; + unsigned char num_clk_values; +}; + +struct dml2_dram_params { + unsigned int channel_width_bytes; + unsigned int channel_count; + unsigned int transactions_per_clock; +}; + +struct dml2_soc_state_table { + struct dml2_clk_table uclk; + struct dml2_clk_table fclk; + struct dml2_clk_table dcfclk; + struct dml2_clk_table dispclk; + struct dml2_clk_table dppclk; + struct dml2_clk_table dtbclk; + struct dml2_clk_table phyclk; + struct dml2_clk_table socclk; + struct dml2_clk_table dscclk; + struct dml2_clk_table phyclk_d18; + struct dml2_clk_table phyclk_d32; + + struct dml2_dram_params dram_config; +}; + +struct dml2_soc_vmin_clock_limits { + unsigned long dispclk_khz; + unsigned long dcfclk_khz; +}; + +struct dml2_soc_bb { + struct dml2_soc_state_table clk_table; + struct dml2_soc_qos_parameters qos_parameters; + struct dml2_soc_power_management_parameters power_management_parameters; + struct dml2_soc_vmin_clock_limits vmin_limit; + + double lower_bound_bandwidth_dchub; + double fraction_of_urgent_bandwidth_nominal_target; + double fraction_of_urgent_bandwidth_flip_target; + unsigned int dprefclk_mhz; + unsigned int xtalclk_mhz; + unsigned int pcie_refclk_mhz; + unsigned int dchub_refclk_mhz; + unsigned int mall_allocated_for_dcn_mbytes; + unsigned int max_outstanding_reqs; + unsigned long fabric_datapath_to_dcn_data_return_bytes; + unsigned long return_bus_width_bytes; + unsigned long hostvm_min_page_size_kbytes; + unsigned long gpuvm_min_page_size_kbytes; + double phy_downspread_percent; + double dcn_downspread_percent; + double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; + bool do_urgent_latency_adjustment; + unsigned int mem_word_bytes; + unsigned int num_dcc_mcaches; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned long max_fclk_for_uclk_dpm_khz; +}; + +struct dml2_ip_capabilities { + unsigned int pipe_count; + unsigned int otg_count; + unsigned int TDLUT_33cube_count; + unsigned int num_dsc; + unsigned int max_num_dp2p0_streams; + unsigned int max_num_hdmi_frl_outputs; + unsigned int max_num_dp2p0_outputs; + unsigned int max_num_wb; + unsigned int rob_buffer_size_kbytes; + unsigned int config_return_buffer_size_in_kbytes; + unsigned int config_return_buffer_segment_size_in_kbytes; + unsigned int meta_fifo_size_in_kentries; + unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int cursor_buffer_size; + unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; + unsigned int hostvm_mode; + unsigned int subvp_drr_scheduling_margin_us; + unsigned int subvp_prefetch_end_to_mall_start_us; + unsigned int subvp_fw_processing_delay; + unsigned int max_vactive_det_fill_delay_us; + unsigned int ppt_max_allow_delay_ns; + unsigned int temp_read_max_allow_delay_us; + unsigned int dummy_pstate_max_allow_delay_us; + /* FAMS2 delays */ + struct { + unsigned int max_allow_delay_us; + unsigned int scheduling_delay_us; + unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int + unsigned int allow_programming_delay_us; // time requires to program allow + unsigned int min_allow_width_us; + unsigned int subvp_df_throttle_delay_us; + unsigned int subvp_programming_delay_us; + unsigned int subvp_prefetch_to_mall_delay_us; + unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; + } fams2; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h new file mode 100644 index 000000000000..8646ce5f1c01 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_types.h @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_TYPES_H__ +#define __DML_TOP_TYPES_H__ + +#include "dml_top_display_cfg_types.h" +#include "dml_top_soc_parameter_types.h" +#include "dml_top_policy_types.h" +#include "dml_top_dchub_registers.h" + +#include "dmub_cmd.h" + +struct dml2_instance; + +enum dml2_project_id { + dml2_project_invalid = 0, + dml2_project_dcn4x_stage1, + dml2_project_dcn4x_stage2, + dml2_project_dcn4x_stage2_auto_drr_svp, +}; + +enum dml2_pstate_change_support { + dml2_pstate_change_vactive = 0, + dml2_pstate_change_vblank = 1, + dml2_pstate_change_vblank_and_vactive = 2, + dml2_pstate_change_drr = 3, + dml2_pstate_change_mall_svp = 4, + dml2_pstate_change_mall_full_frame = 6, + dml2_pstate_change_unsupported = 7 +}; + +enum dml2_output_type_and_rate__type { + dml2_output_type_unknown = 0, + dml2_output_type_dp = 1, + dml2_output_type_edp = 2, + dml2_output_type_dp2p0 = 3, + dml2_output_type_hdmi = 4, + dml2_output_type_hdmifrl = 5 +}; + +enum dml2_output_type_and_rate__rate { + dml2_output_rate_unknown = 0, + dml2_output_rate_dp_rate_hbr = 1, + dml2_output_rate_dp_rate_hbr2 = 2, + dml2_output_rate_dp_rate_hbr3 = 3, + dml2_output_rate_dp_rate_uhbr10 = 4, + dml2_output_rate_dp_rate_uhbr13p5 = 5, + dml2_output_rate_dp_rate_uhbr20 = 6, + dml2_output_rate_hdmi_rate_3x3 = 7, + dml2_output_rate_hdmi_rate_6x3 = 8, + dml2_output_rate_hdmi_rate_6x4 = 9, + dml2_output_rate_hdmi_rate_8x4 = 10, + dml2_output_rate_hdmi_rate_10x4 = 11, + dml2_output_rate_hdmi_rate_12x4 = 12, + dml2_output_rate_hdmi_rate_16x4 = 13, + dml2_output_rate_hdmi_rate_20x4 = 14 +}; + +struct dml2_pmo_options { + bool disable_vblank; + bool disable_svp; + bool disable_drr_var; + bool disable_drr_clamped; + bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; + bool disable_fams2; + bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ + bool disable_dyn_odm; + bool disable_dyn_odm_for_multi_stream; + bool disable_dyn_odm_for_stream_with_svp; + struct dml2_pmo_pstate_strategy *override_strategy_lists[DML2_MAX_PLANES]; + unsigned int num_override_strategies_per_list[DML2_MAX_PLANES]; +}; + +struct dml2_options { + enum dml2_project_id project_id; + struct dml2_pmo_options pmo_options; +}; + +struct dml2_initialize_instance_in_out { + struct dml2_instance *dml2_instance; + struct dml2_options options; + struct dml2_soc_bb soc_bb; + struct dml2_ip_capabilities ip_caps; + + struct { + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + } overrides; +}; + +struct dml2_reset_instance_in_out { + struct dml2_instance *dml2_instance; +}; + +struct dml2_check_mode_supported_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + bool is_supported; +}; + +struct dml2_mcache_surface_allocation { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Shift grainularity is not necessarily 1 + */ + struct { + int p0; + int p1; + } shift_granularity; + + /* + * MCacheIDs have global scope in the SoC, and they are stored here. + * These IDs are generally not valid until all planes in a display + * configuration have had their mcache requirements calculated. + */ + int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + + struct { + int meta_row_bytes_plane0; + int meta_row_bytes_plane1; + } informative; +}; + +enum dml2_pstate_type { + dml2_pstate_type_uclk, + dml2_pstate_type_ppt, + dml2_pstate_type_temp_read, + dml2_pstate_type_dummy_pstate, + dml2_pstate_type_count +}; + +enum dml2_pstate_method { + dml2_pstate_method_na = 0, + /* hw exclusive modes */ + dml2_pstate_method_vactive = 1, + dml2_pstate_method_vblank = 2, + dml2_pstate_method_reserved_hw = 5, + /* fw assisted exclusive modes */ + dml2_pstate_method_fw_svp = 6, + dml2_pstate_method_reserved_fw = 10, + /* fw assisted modes requiring drr modulation */ + dml2_pstate_method_fw_vactive_drr = 11, + dml2_pstate_method_fw_vblank_drr = 12, + dml2_pstate_method_fw_svp_drr = 13, + dml2_pstate_method_reserved_fw_drr_clamped = 20, + dml2_pstate_method_fw_drr = 21, + dml2_pstate_method_reserved_fw_drr_var = 22, + dml2_pstate_method_count +}; + +struct dml2_per_plane_programming { + const struct dml2_plane_parameters *plane_descriptor; + + union { + struct { + unsigned long dppclk_khz; + } dcn4x; + } min_clocks; + + struct dml2_mcache_surface_allocation mcache_allocation; + + // If a stream is using automatic or forced odm combine + // and the stream for this plane has num_odms_required > 1 + // num_dpps_required is always equal to num_odms_required for + // ALL planes of the stream + + // If a stream is using odm split, then this value is always 1 + unsigned int num_dpps_required; + + enum dml2_pstate_method uclk_pstate_support_method; + + // MALL size requirements for MALL SS and SubVP + unsigned int surface_size_mall_bytes; + unsigned int svp_size_mall_bytes; + + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + + struct { + bool valid; + struct dml2_plane_parameters descriptor; + struct dml2_mcache_surface_allocation mcache_allocation; + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + } phantom_plane; +}; + +union dml2_global_sync_programming { + struct { + unsigned int vstartup_lines; + unsigned int vupdate_offset_pixels; + unsigned int vupdate_vupdate_width_pixels; + unsigned int vready_offset_pixels; + unsigned int pstate_keepout_start_lines; + } dcn4x; +}; + +struct dml2_per_stream_programming { + const struct dml2_stream_parameters *stream_descriptor; + + union { + struct { + unsigned long dscclk_khz; + unsigned long dtbclk_khz; + unsigned long phyclk_khz; + } dcn4x; + } min_clocks; + + union dml2_global_sync_programming global_sync; + + unsigned int num_odms_required; + + enum dml2_pstate_method uclk_pstate_method; + + struct { + bool enabled; + struct dml2_stream_parameters descriptor; + union dml2_global_sync_programming global_sync; + } phantom_stream; + + union dmub_cmd_fams2_config fams2_base_params; + union { + union dmub_cmd_fams2_config fams2_sub_params; + union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2; + }; +}; + +//----------------- +// Mode Support Information +//----------------- + +struct dml2_mode_support_info { + bool ModeIsSupported; //pipe_count = ip_params->max_num_dpp; + ip_caps->otg_count = ip_params->max_num_otg; + ip_caps->num_dsc = ip_params->num_dsc; + ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams; + ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs; + ip_caps->max_num_hdmi_frl_outputs = ip_params->max_num_hdmi_frl_outputs; + ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes; + ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes; + ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes; + ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; + ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; + ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size; + ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; + ip_caps->hostvm_mode = ip_params->hostvm_mode; + + // FIXME_STAGE2: cleanup after adding all dv override to ip_caps + ip_caps->subvp_drr_scheduling_margin_us = 100; + ip_caps->subvp_prefetch_end_to_mall_start_us = 15; + ip_caps->subvp_fw_processing_delay = 16; + +} + +static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps) +{ + ip_params->max_num_dpp = ip_caps->pipe_count; + ip_params->max_num_otg = ip_caps->otg_count; + ip_params->num_dsc = ip_caps->num_dsc; + ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; + ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs; + ip_params->max_num_hdmi_frl_outputs = ip_caps->max_num_hdmi_frl_outputs; + ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes; + ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes; + ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes; + ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; + ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; + ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size; + ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; + ip_params->hostvm_mode = ip_caps->hostvm_mode; +} + +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) +{ + struct dml2_core_instance *core = in_out->instance; + + if (!in_out->minimum_clock_table) + return false; + else + core->minimum_clock_table = in_out->minimum_clock_table; + + if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) { + memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size); + + // FIXME_STAGE2: + // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info + // Should move DV to use ip_caps but need move more overrides to ip_caps + patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb); + core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines; + } else { + memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); + patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps); + core->clean_me_up.mode_lib.ip.imall_supported = false; + } + + memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + + return true; +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg, + const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index; + unsigned int total_main_mcaches_required = 0; + int total_pipe_regs_copied = 0; + int dml_internal_pipe_index = 0; + const struct dml2_plane_parameters *main_plane; + const struct dml2_plane_parameters *phantom_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + // Copy the unexpanded display config to output + memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + + // Set the global register values + dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs); + // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion + // display config + dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]); + + // Check if FAMS2 is required + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); + } + + // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) + for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) { + main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]]; + + // Set the descriptor + programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index]; + + // Set the odm combine factor + programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used; + + // Check if the stream has implicit SVP enabled + if (main_stream != phantom_stream) { + // If so, copy the phantom stream descriptor + programming->stream_programming[stream_index].phantom_stream.enabled = true; + memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters)); + } else { + programming->stream_programming[stream_index].phantom_stream.enabled = false; + } + + // Due to the way DML indexes data internally, it's easier to populate the rest of the display + // stream programming in the next stage + } + + dml_internal_pipe_index = 0; + total_pipe_regs_copied = 0; + stream_already_populated_mask = 0x0; + + // Loop over all main planes + for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) { + main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + + // Set the descriptor + programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index]; + + // Set the mpc combine factor + programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + // Setup the appropriate p-state strategy + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; + } else { + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + memcpy(&programming->plane_programming[plane_index].mcache_allocation, + &display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 + + programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 - + (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0); + + for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the main plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) { + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index); + + programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method; + + /* unconditionally populate fams2 params */ + dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, + display_cfg, + &programming->stream_programming[main_plane->stream_index].fams2_base_params, + &programming->stream_programming[main_plane->stream_index].fams2_sub_params, + programming->stream_programming[main_plane->stream_index].uclk_pstate_method, + plane_index); + + stream_already_populated_mask |= (0x1 << main_plane->stream_index); + } + dml_internal_pipe_index++; + } + } + + for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) { + phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index]; + main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index]; + + programming->plane_programming[main_plane_index].phantom_plane.valid = true; + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters)); + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index); + + /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */ + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation, + &programming->plane_programming[main_plane_index].mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index]; + } + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index]; + } + + for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the phantom plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index); + // Populate the phantom stream specific programming + if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) { + dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << phantom_plane->stream_index); + } + + dml_internal_pipe_index++; + } + } +} + +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals; + + bool result; + unsigned int i, stream_index, stream_bitmask; + int unsigned odm_count, num_odm_output_segments, dpp_count; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_support_ex_params.min_clk_table = in_out->min_clk_table; + l->mode_support_ex_params.min_clk_index = in_out->min_clk_index; + l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info; + + result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params); + + in_out->mode_support_result.cfg_support_info.is_supported = result; + + if (result) { + in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000); + in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000); + in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000); + + in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported; + in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported; + + in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000); + in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000); + + + in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000; + in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + + in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); + + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); + } + + stream_bitmask = 0; + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + num_odm_output_segments = 1; + + switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) { + case dml2_odm_mode_bypass: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + case dml2_odm_mode_combine_2to1: + odm_count = 2; + dpp_count = 2; + break; + case dml2_odm_mode_combine_3to1: + odm_count = 3; + dpp_count = 3; + break; + case dml2_odm_mode_combine_4to1: + odm_count = 4; + dpp_count = 4; + break; + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + num_odm_output_segments = 2; + break; + case dml2_odm_mode_mso_1to4: + num_odm_output_segments = 4; + break; + case dml2_odm_mode_auto: + default: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + } + + in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count; + + dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i); + + stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; + + in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; + DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); + + if (!((stream_bitmask >> stream_index) & 0x1)) { + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_odm_output_segments = num_odm_output_segments; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i]; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i]; + dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i); + in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000); + stream_bitmask |= 0x1 << stream_index; + } + } + } + + return result; +} + +static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb) +{ + int i; + + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i]) + return i; + } + return 0; +} + +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals; + + bool result = false; + unsigned int pipe_offset; + int dml_internal_pipe_index; + int total_pipe_regs_copied = 0; + int stream_already_populated_mask = 0; + + int main_stream_index; + unsigned int plane_index; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; + l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; + l->mode_programming_ex_params.programming = in_out->programming; + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, + &core->clean_me_up.mode_lib.soc); + + result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); + + if (result) { + // If the input display configuration contains implict SVP, we need to use a special packer + if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) { + pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch); + } else { + memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg)); + + dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs); + dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]); + + dml_internal_pipe_index = 0; + + for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { + in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation, + &in_out->display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + + for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index]; + + // Assign storage for this pipe's register values + in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied]; + memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate + dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index; + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_stream_index))) { + in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index]; + in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used; + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << main_stream_index); + } + dml_internal_pipe_index++; + } + } + } + } + + return result; +} + +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib; + + if (in_out->mode_is_supported) + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index; + else + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index; + + dml2_core_calcs_get_informative(mode_lib, in_out->programming); + return true; +} + +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out) +{ + memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation)); + + dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index); + + if (in_out->mcache_allocation->num_mcaches_plane0 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width; + + if (in_out->mcache_allocation->num_mcaches_plane1 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width; + + in_out->mcache_allocation->requires_dedicated_mall_mcache = false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h new file mode 100644 index 000000000000..a68bb001a346 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_DCN4_H__ +#define __DML2_CORE_DCN4_H__ +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out); +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out); +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out); +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c new file mode 100644 index 000000000000..f809c4073b43 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -0,0 +1,13341 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml_top_types.h" + +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 +#define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define DML_MAX_COMPRESSION_RATIO 4 +//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW +//#define DML_GLOBAL_PREFETCH_CHECK +#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE +#define DML_MAX_VSTARTUP_START 1023 + +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); +} + +static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); + } +} + +static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); + return num_active_pipes; +} + +static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); + DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + return is_phantom; +} + +#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \ +{ \ +unsigned int plane_idx; \ +plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes); +dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes); +dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY); +dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC); +dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear); +dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma); + +dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup); +dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); +dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); +dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); +dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); +dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); +dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); +dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL); + +#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \ +{ \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l); +dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l); +dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l); +dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c); +dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c); +dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c); +dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l); +dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c); +dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache); +dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL); +dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines); + +#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \ +{ \ +return (type) interval_var[plane_idx][array_idx]; \ +} + +dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l); +dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c); + +#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \ +{ \ +return (type) internal_var; \ +} + +dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark); +dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark); +dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); +dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark); +dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); +dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); + +dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); +dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); +dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); +dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); +dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); +dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); +dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); +dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); +dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); +dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); +dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); +dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency); +dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod); +dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase); +dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); +dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase); +dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); +dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0); + +dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + +dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); + +dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); +dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); +dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); +dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); + +dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte); + +dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled); +dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark); +dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b); +dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5); +dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + +static void CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + bool is_mrq_present, + + // Output + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte) +{ + if (is_mrq_present) + *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64); + else + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + + *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + + DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) +{ + //unsigned int num_active_planes = display_cfg->num_planes; + + //Progressive To Interlace Unit Effect + for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { + PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { + // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly + //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; + } + } +} + +static bool dml_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { + DML_ASSERT(0); + return 256; + } +} + +static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); + } + + return version; +} + +static void CalculateBytePerPixelAndBlockSizes( + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + unsigned int pitch_y, + unsigned int pitch_c, + + // Output + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC, + bool *surf_linear128_l, + bool *surf_linear128_c) +{ + *BytePerPixelDETY = 0; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 1; + + if (SourcePixelFormat == dml2_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml2_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml2_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else if (SourcePixelFormat == dml2_420_10) { + *BytePerPixelDETY = (double)(4.0 / 3); + *BytePerPixelDETC = (double)(8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML_ASSERT(0); + } + + DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); + DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); + DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + *surf_linear128_l = 0; + *surf_linear128_c = 0; + } else { + if (SurfaceTiling == dml2_sw_linear) { + *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); + + if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) + *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); + } + } + + if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml2_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml2_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { // dual plane + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml2_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + if (SurfaceTiling == dml2_gfx11_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + } else { + unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); + unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling + + if (SurfaceTiling == dml2_sw_linear) { + macro_tile_scale = 1; + } else if (SurfaceTiling == dml2_sw_4kb_2d) { + macro_tile_scale = 4; + } else if (SurfaceTiling == dml2_sw_64kb_2d) { + macro_tile_scale = 16; + } else if (SurfaceTiling == dml2_sw_256kb_2d) { + macro_tile_scale = 32; + } else { + DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML_ASSERT(0); + } + + *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; + *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; + } + } + + DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +} + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum dml2_source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + // Output + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP) +{ + double DPPCLKUsingSingleDPPLuma; + double DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +static void CalculateSwathWidth( + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum dml2_odm_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + bool surf_linear128_l[], + bool surf_linear128_c[], + unsigned int DPPPerSurface[], + + // Output + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + unsigned int SwathWidthSingleDPPY[], // post-rotated plane width + unsigned int SwathWidthSingleDPPC[], + unsigned int SwathWidthY[], // per-pipe + unsigned int SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]) // per-pipe +{ + enum dml2_odm_mode MainSurfaceODMMode; + double odm_hactive_factor = 1.0; + unsigned int req_width_horz_y; + unsigned int req_width_horz_c; + unsigned int surface_width_ub_l; + unsigned int surface_height_ub_l; + unsigned int surface_width_ub_c; + unsigned int surface_height_ub_c; + + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + } else { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + } + + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + + MainSurfaceODMMode = ODMMode[k]; + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) + odm_hactive_factor = 4.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) + odm_hactive_factor = 3.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) + odm_hactive_factor = 2.0; + + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { + SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + + DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + req_width_horz_y = Read256BytesBlockWidthY[k]; + req_width_horz_c = Read256BytesBlockWidthC[k]; + + if (surf_linear128_l[k]) + req_width_horz_y = req_width_horz_y / 2; + + if (surf_linear128_c[k]) + req_width_horz_c = req_width_horz_c / 2; + + surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); + surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); + surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); + surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); + + req_per_swath_ub_l[k] = 0; + req_per_swath_ub_c[k] = 0; + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; + + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; + } else { + swath_width_chroma_ub[k] = 0; + } + } + + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); + } +} + +static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) +{ + bool unb_req_ok = false; + bool unb_req_en = false; + + unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + unb_req_en = unb_req_ok; + + if (unb_req_force_en) { + unb_req_en = unb_req_force_val && unb_req_ok; + } + DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); + return unb_req_en; +} + +static void CalculateDETBufferSize( + struct dml2_core_shared_CalculateDETBufferSize_locals *l, + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + bool UnboundedRequestEnabled, + unsigned int nomDETInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MinCompressedBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInkByte, + unsigned int CompressedBufferSegmentSizeInkByte, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int full_swath_bytes_l[], + unsigned int full_swath_bytes_c[], + unsigned int DPPPerSurface[], + // Output + unsigned int DETBufferSizeInKByte[], + unsigned int *CompressedBufferSizeInkByte) +{ + memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals)); + + bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; + bool NextPotentialSurfaceToAssignDETPieceFound; + bool MinimizeReallocationSuccess = false; + + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb; + } else { + DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + l->DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = 0; + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; + } else { + l->max_minDET = nomDETInKByte; + } + l->minDET = 128; + l->minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) { + if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET) + l->minDET_pipe = l->minDET; + l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; + } + + DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); + DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); + + if (l->minDET_pipe == 0) { + l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); + } + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETBufferSizeInKByte[k] = 0; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = l->minDET_pipe; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; + } + + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); + } + + if (display_cfg->minimize_det_reallocation) { + MinimizeReallocationSuccess = true; + // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global + // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on + // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a + // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. + + // Calculate total pixel rate + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz; + } + + // Calculate per stream DET budget + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate); + l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k]; + } + + // Calculate the per stream total bandwidth + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + + // Check the minimum can be satisfied by budget + if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); + } else { + MinimizeReallocationSuccess = false; + break; + } + } + } + + if (MinimizeReallocationSuccess) { + // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams + // budget proportionally across its planes + l->ResidualDETAfterRounding = MaxTotalDETInKByte; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index]) + * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]); + + if (l->IdealDETBudget > DETBufferSizeInKByte[k]) { + l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k]; + if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]) + l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]; + + /* split the additional budgeted DET among the pipes per plane */ + DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k])); + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget; + } + + // Round down to segment size + DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte; + + l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]); + } + } + } + } + + if (!MinimizeReallocationSuccess) { + l->TotalBandwidth = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + } + DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + } + DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); + l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } + DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); + } + + for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + l->NextSurfaceToAssignDETPiece = 0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { + l->NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + l->NextDETBufferPieceInKByte = (unsigned int)(math_min2( + math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)) + * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, + math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); + + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); + + DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); + + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + } + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; + + DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +} + +static double CalculateRequiredDispclk( + enum dml2_odm_mode ODMMode, + double PixelClock, + bool isTMDS420) +{ + double DispClk; + + if (ODMMode == dml2_odm_mode_combine_4to1) { + DispClk = PixelClock / 4.0; + } else if (ODMMode == dml2_odm_mode_combine_3to1) { + DispClk = PixelClock / 3.0; + } else if (ODMMode == dml2_odm_mode_combine_2to1) { + DispClk = PixelClock / 2.0; + } else { + DispClk = PixelClock; + } + + if (isTMDS420) { + double TMDS420MinPixClock = PixelClock / 2.0; + DispClk = math_max2(DispClk, TMDS420MinPixClock); + } + + return DispClk; +} + +static double TruncToValidBPP( + struct dml2_core_shared_TruncToValidBPP_locals *l, + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + + // Output + unsigned int *RequiredSlots) +{ + double MaxLinkBPP; + unsigned int MinDSCBPP; + double MaxDSCBPP; + unsigned int NonDSCBPP0; + unsigned int NonDSCBPP1; + unsigned int NonDSCBPP2; + enum dml2_odm_mode ODMMode; + + if (Format == dml2_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 16; + } else if (Format == dml2_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 16; + } else { + + if (Output == dml2_hdmi || Output == dml2_hdmifrl) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml2_n422 || Output == dml2_hdmifrl) { + MinDSCBPP = 7; + MaxDSCBPP = 16; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 16; + } + } + + if (Output == dml2_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml2_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; + + if (ODMMode == dml2_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML2_CALCS_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML2_CALCS_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML2_CALCS_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} + +// updated for dcn4 +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum dml2_output_format_class pixelFormat, + enum dml2_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; + + if (pixelFormat == dml2_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml2_444) + pixelsPerClock = 1; + else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); + + //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) + slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; + + padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; + + if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { + if ((initial_xmit_delay + padding_pixels) % 3 == 1) { + initial_xmit_delay++; + } + } + + //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard + if (bpc == 8) + ssm_group_priming_delay = 83; + else if (bpc == 10) + ssm_group_priming_delay = 91; + else if (bpc == 12) + ssm_group_priming_delay = 115; + else if (bpc == 14) + ssm_group_priming_delay = 123; + else + ssm_group_priming_delay = 128; + + //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice + slice_width_groups = (slice_width_modified + 2) / 3; + + //determine number of padded pixels in the last group of a slice line, computed as + slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; + + //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered + number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; + + //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay + //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay + ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; + + //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels + ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; + + //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay + groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; + + //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice + //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice + lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next + + //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached + //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay + additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; + + //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) + ssm_pipeline_delay = 2; + + //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) + obsm_pipeline_delay = 1; + + //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes + if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + cycles_per_group = 6; + else + cycles_per_group = 3; + //delay of the bit stream contruction layer in pixels is the sum of: + //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice + //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) + //3. additional group of delay if initial transmit delay is reached exactly in a group + //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) + group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; + pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; + + //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) + pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; + + DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); + DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); + DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); + DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); + return pixels; +} + +//updated in dcn4 +static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) +{ + unsigned int Delay = 0; + unsigned int dispclk_per_dscclk = 3; + + // sfr + Delay = Delay + 2; + + if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + dispclk_per_dscclk = 3 * 2; + } + + if (pixelFormat == dml2_420) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 5; + + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_s422) { + //dscc top delay for pixel compression layer + Delay = Delay + 17 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } + + // sft + Delay = Delay + 1; + DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); + + return Delay; +} + +static unsigned int CalculateHostVMDynamicLevels( + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + unsigned int HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) +{ + unsigned int extra_dpde_bytes; + unsigned int extra_mpde_bytes; + unsigned int MacroTileSizeBytes; + unsigned int vp_height_dpte_ub; + + unsigned int meta_surface_bytes; + unsigned int vm_bytes; + unsigned int vp_height_meta_ub; + unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; + *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; + if (p->SurfaceTiling == dml2_sw_linear) { + *p->meta_row_height = 32; + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->meta_row_height = *p->MetaRequestHeight; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); + } else { + *p->meta_row_height = *p->MetaRequestWidth; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); + } + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } else { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } + + meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); + DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); + if (p->GPUVMEnable == true) { + double meta_vmpg_bytes = 4.0 * 1024.0; + *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); + extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); + } else { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + } + + if (!p->DCCEnable || !p->mrq_present) { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + *p->meta_row_bytes = 0; + } + + if (!p->GPUVMEnable) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRowStorage = 0; + *p->dpte_row_width_ub = 0; + *p->dpte_row_height = 0; + *p->dpte_row_height_linear = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + *p->dpte_row_width_ub_one_row_per_frame = 0; + *p->dpte_row_height_one_row_per_frame = 0; + *p->vmpg_width = 0; + *p->vmpg_height = 0; + *p->PixelPTEReqWidth = 0; + *p->PixelPTEReqHeight = 0; + *p->PTERequestSize = 0; + *p->dpde0_bytes_per_frame_ub = 0; + return 0; + } + + MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); + } else { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); + } + + if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { + *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); + extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); + } else { + *p->dpde0_bytes_per_frame_ub = 0; + extra_dpde_bytes = 0; + } + + vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; + + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->PixelPTEReqHeight = 1; + *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + *p->PTERequestSize = 64; + + *p->vmpg_height = 1; + *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; + } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile + // one 64KB tile, is 16x16x256B req + *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; + *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; + *p->PTERequestSize = 128; + + *p->vmpg_height = *p->PixelPTEReqHeight; + *p->vmpg_width = *p->PixelPTEReqWidth; + } else { + // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + if (p->GPUVMEnable == true) { + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); + DML_ASSERT(0); + } + } + + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); + + *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + *p->dpte_row_height_linear = 0; + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); + *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); + if (*p->dpte_row_height_linear > 128) + *p->dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->dpte_row_height = *p->PixelPTEReqHeight; + + if (p->GPUVMMinPageSizeKBytes > 64) { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); + } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); +#endif + + *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; + } else { + *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); + + if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); + } + + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); +#endif + } + + if (p->GPUVMEnable != true) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + } + + *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); +#endif + + return vm_bytes; +} // CalculateVMAndRowBytes + +static unsigned int CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dml2_rotation_angle RotationAngle, + bool mirrored, + bool ViewportStationary, + unsigned int SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + // Output + unsigned int *VInitPreFill, + unsigned int *MaxNumSwath) +{ + + unsigned int vp_start_rot = 0; + unsigned int sw0_tmp = 0; + unsigned int MaxPartialSwath = 0; + double numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); + DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (RotationAngle == dml2_rotation_180) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { + vp_start_rot = ViewportXStart; + } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (unsigned int)(numLines); + +} + +static void CalculateRowBandwidth( + bool GPUVMEnable, + bool use_one_row_for_frame, + enum dml2_source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + + bool mrq_present, + unsigned int meta_row_bytes_per_row_ub_l, + unsigned int meta_row_bytes_per_row_ub_c, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + + // Output + double *dpte_row_bw, + double *meta_row_bw) +{ + if (!DCCEnable || !mrq_present) { + *meta_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) + + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateMALLUseForStaticScreen( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + // Output + bool is_using_mall_for_ss[]) +{ + + unsigned int SurfaceToAddToMALL; + bool CanAddAnotherSurfaceToMALL; + unsigned int TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); + if (is_using_mall_for_ss[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && + !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + } + } + if (CanAddAnotherSurfaceToMALL) { + is_using_mall_for_ss[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum dml2_source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dml2_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dml2_rotation_angle RotationAngle, + + // Output + enum dml2_core_internal_request_type *RequestLuma, + enum dml2_core_internal_request_type *RequestChroma, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; + + unsigned int segment_order_horz_contiguous_luma; + unsigned int segment_order_horz_contiguous_chroma; + unsigned int segment_order_vert_contiguous_luma; + unsigned int segment_order_vert_contiguous_chroma; + + unsigned int req128_horz_wc_l; + unsigned int req128_horz_wc_c; + unsigned int req128_vert_wc_l; + unsigned int req128_vert_wc_c; + + unsigned int yuv420; + unsigned int horz_div_l; + unsigned int horz_div_c; + unsigned int vert_div_l; + unsigned int vert_div_c; + + unsigned int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + unsigned int MAS_vp_horz_limit; + unsigned int MAS_vp_vert_limit; + unsigned int max_vp_horz_width; + unsigned int max_vp_vert_height; + unsigned int eff_surf_width_l; + unsigned int eff_surf_width_c; + unsigned int eff_surf_height_l; + unsigned int eff_surf_height_c; + + unsigned int full_swath_bytes_horz_wc_l; + unsigned int full_swath_bytes_horz_wc_c; + unsigned int full_swath_bytes_vert_wc_l; + unsigned int full_swath_bytes_vert_wc_c; + + if (dml_is_420(SourcePixelFormat)) + yuv420 = 1; + else + yuv420 = 0; + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml2_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml2_420_10) { + full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else if (!dml_is_vertical_rotation(RotationAngle)) { + if (req128_horz_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else { + if (req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } + + if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} + +static void calculate_mcache_row_bytes( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) +{ + unsigned int vmpg_bytes = 0; + unsigned int blk_bytes = 0; + float meta_per_mvmpg_per_channel = 0; + unsigned int est_blk_per_vmpg = 2; + unsigned int mvmpg_per_row_ub = 0; + unsigned int full_vp_width_mvmpg_aligned = 0; + unsigned int full_vp_height_mvmpg_aligned = 0; + unsigned int meta_per_mvmpg_per_channel_ub = 0; + unsigned int mvmpg_per_mcache; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); + DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); + DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); +#endif + DML_ASSERT(p->mcache_line_size_bytes != 0); + DML_ASSERT(p->mcache_size_bytes != 0); + + *p->mvmpg_width = 0; + *p->mvmpg_height = 0; + + if (p->full_vp_height == 0 && p->full_vp_width == 0) { + *p->num_mcaches = 0; + *p->mcache_row_bytes = 0; + *p->mcache_row_bytes_per_channel = 0; + } else { + blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); + + // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. + // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. + // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. + *p->mvmpg_width = p->blk_width; + *p->mvmpg_height = p->blk_height; + if (p->gpuvm_enable) { + if (vmpg_bytes >= blk_bytes) { + *p->mvmpg_width = p->vmpg_width; + *p->mvmpg_height = p->vmpg_height; + } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { + DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML_ASSERT(0); + } + } + + //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c + full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); + full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); + + *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; + + //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. + if (!p->surf_vert) { //horizontal access + if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) + *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; + } else { //vertical access + if (p->vp_stationary == 1) + *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; + } + + if (p->gpuvm_enable) { + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; + + //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic + if (p->surf_vert && vmpg_bytes > blk_bytes) { + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; + } + + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom + } else { + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; + + if (!p->surf_vert) + *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; + else + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); + } + + meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); + + //but for 4KB vmpg with 64KB tile blk + if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) + meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; + + // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, + // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. + if (p->gpuvm_enable || p->surf_vert) { + *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; + *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans; + } else { // horizontal and gpuvm disable + *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; + if (p->mcache_line_size_bytes != 0) + *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); + } + + *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref + if (p->mcache_size_bytes != 0) + *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1); + + mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; + *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); + DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); +#endif + DML_ASSERT(*p->num_mcaches > 0); +} + +static void calculate_mcache_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_setting_params *p) +{ + unsigned int n; + + struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; + memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); + + *p->num_mcaches_l = 0; + *p->mcache_row_bytes_l = 0; + *p->mcache_row_bytes_per_channel_l = 0; + *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; + + *p->num_mcaches_c = 0; + *p->mcache_row_bytes_c = 0; + *p->mcache_row_bytes_per_channel_c = 0; + *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; + + *p->mall_comb_mcache_l = 0; + *p->mall_comb_mcache_c = 0; + *p->lc_comb_mcache = 0; + + if (!p->dcc_enable) + return; + + l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; + + l->l_p.num_chans = p->num_chans; + l->l_p.mem_word_bytes = p->mem_word_bytes; + l->l_p.mcache_size_bytes = p->mcache_size_bytes; + l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->l_p.gpuvm_enable = p->gpuvm_enable; + l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->l_p.surf_vert = p->surf_vert; + l->l_p.vp_stationary = p->vp_stationary; + l->l_p.tiling_mode = p->tiling_mode; + l->l_p.vp_start_x = p->vp_start_x_l; + l->l_p.vp_start_y = p->vp_start_y_l; + l->l_p.full_vp_width = p->full_vp_width_l; + l->l_p.full_vp_height = p->full_vp_height_l; + l->l_p.blk_width = p->blk_width_l; + l->l_p.blk_height = p->blk_height_l; + l->l_p.vmpg_width = p->vmpg_width_l; + l->l_p.vmpg_height = p->vmpg_height_l; + l->l_p.full_swath_bytes = p->full_swath_bytes_l; + l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; + + // output + l->l_p.num_mcaches = p->num_mcaches_l; + l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; + l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l; + l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; + l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; + l->l_p.mvmpg_width = &l->mvmpg_width_l; + l->l_p.mvmpg_height = &l->mvmpg_height_l; + l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; + l->l_p.meta_row_width_ub = &l->meta_row_width_l; + l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; + + calculate_mcache_row_bytes(scratch, &l->l_p); + DML_ASSERT(*p->num_mcaches_l > 0); + + if (l->is_dual_plane) { + l->c_p.num_chans = p->num_chans; + l->c_p.mem_word_bytes = p->mem_word_bytes; + l->c_p.mcache_size_bytes = p->mcache_size_bytes; + l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->c_p.gpuvm_enable = p->gpuvm_enable; + l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->c_p.surf_vert = p->surf_vert; + l->c_p.vp_stationary = p->vp_stationary; + l->c_p.tiling_mode = p->tiling_mode; + l->c_p.vp_start_x = p->vp_start_x_c; + l->c_p.vp_start_y = p->vp_start_y_c; + l->c_p.full_vp_width = p->full_vp_width_c; + l->c_p.full_vp_height = p->full_vp_height_c; + l->c_p.blk_width = p->blk_width_c; + l->c_p.blk_height = p->blk_height_c; + l->c_p.vmpg_width = p->vmpg_width_c; + l->c_p.vmpg_height = p->vmpg_height_c; + l->c_p.full_swath_bytes = p->full_swath_bytes_c; + l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; + + // output + l->c_p.num_mcaches = p->num_mcaches_c; + l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; + l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c; + l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; + l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; + l->c_p.mvmpg_width = &l->mvmpg_width_c; + l->c_p.mvmpg_height = &l->mvmpg_height_c; + l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; + l->c_p.meta_row_width_ub = &l->meta_row_width_c; + l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; + + calculate_mcache_row_bytes(scratch, &l->c_p); + DML_ASSERT(*p->num_mcaches_c > 0); + } + + // Sharing for iMALL access + l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes; + l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes; + l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; + l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; + + if (p->imall_enable) { + *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); + + if (l->is_dual_plane) + *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); + } + + if (!p->surf_vert) // horizonatal access + l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; + else // vertical access + l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; + + // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: + if (*p->num_mcaches_l) { + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + } + if (l->is_dual_plane) { + l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; + + /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */ + if (l->mcache_remainder_l && l->mcache_remainder_c) { + if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { + l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + + (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); + } + *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + + if (l->is_dual_plane) { + DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + } +#endif + // calculate split_coordinate + l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; + l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; + + for (n = 0; n < *p->num_mcaches_l - 1; n++) { + p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; + } + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c - 1; n++) { + p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; + } + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + } +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + } +#endif + + // Luma/Chroma combine in the last mcache + // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary + if (*p->lc_comb_mcache && l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_l - 1; n++) + p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + for (n = 0; n < *p->num_mcaches_c - 1; n++) + p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + for (n = 0; n < *p->num_mcaches_c; n++) + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); +#endif + } + + *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; + *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; +} + +static void calculate_mall_bw_overhead_factor( + double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref + double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes) +{ + for (unsigned int k = 0; k < num_active_planes; ++k) { + mall_prefetch_sdp_overhead_factor[k] = 1.0; + mall_prefetch_dram_overhead_factor[k] = 1.0; + + // SDP - on the return side + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return + mall_prefetch_sdp_overhead_factor[k] = 1.25; + else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + mall_prefetch_sdp_overhead_factor[k] = 0.25; + + // DRAM + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mall_prefetch_dram_overhead_factor[k] = 2.0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); +#endif + } +} + +static double dml_get_return_bandwidth_available( + const struct dml2_soc_bb *soc, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool is_avg_bw, + bool is_hvm_en, + bool is_hvm_only, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + double return_bw_mbps = 0.; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; + double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; + double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; + + double derate_sdp_factor; + double derate_fabric_factor; + double derate_dram_factor; + + double derate_sdp_bandwidth; + double derate_fabric_bandwidth; + double derate_dram_bandwidth; + + if (is_avg_bw) { + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; + } + } else { // urgent bw + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + } + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; + } + } + } + + derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; + derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; + derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; + + if (bw_type == dml2_core_internal_bw_sdp) + return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); + else // dml2_core_internal_bw_dram + return_bw_mbps = derate_dram_bandwidth; + + DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); + return return_bw_mbps; +} + +static noinline_for_stack void calculate_bandwidth_available( + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], + + const struct dml2_soc_bb *soc, + bool HostVMEnable, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + unsigned int n, m; + + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + + // Calculate all the bandwidth availabe + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, + m, // soc_state + n, // bw_type + 1, // avg_bw + HostVMEnable, + 0, // hvm_only + dcfclk_mhz, + fclk_mhz, + dram_bw_mbps); + + urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); +#endif + + // urg_bandwidth_available_vm_only is indexed by soc_state + if (n == dml2_core_internal_bw_dram) { + urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + } + } + + avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); +#endif + } +} + +static void calculate_avg_bandwidth_required( + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double mall_prefetch_dram_overhead_factor[], + double mall_prefetch_sdp_overhead_factor[]) +{ + unsigned int n, m, k; + double sdp_overhead_factor; + double dram_overhead_factor_p0; + double dram_overhead_factor_p1; + + // Average BW support check + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram + avg_bandwidth_required[m][n] = 0; + } + } + + // SysActive and SVP Prefetch AVG bandwidth Check + for (k = 0; k < num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); +#endif + + sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; + dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; + dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; + + // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? + // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + } + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +#endif + } +} + +static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) +{ + struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; + + s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightC, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->vm_bytes_c = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; + + s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); +#endif + p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); + p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; + p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k]; + p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); +#endif + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; + } + + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); +#ifdef __DML_VBA_DEBUG__ + if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + } +#endif + } + + CalculateMALLUseForStaticScreen( + p->display_cfg, + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->is_using_mall_for_ss); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable) { + if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { + p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); + p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); + } else { + p->PTE_BUFFER_MODE[k] = 0; + p->BIGK_FRAGMENT_SIZE[k] = 0; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DCCMetaBufferSizeNotExceeded[k] = true; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k]; + p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k]; + + // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->display_cfg->gpuvm_enable, + p->use_one_row_for_frame[k], + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + p->mrq_present, + p->meta_row_bytes_per_row_ub_l[k], + p->meta_row_bytes_per_row_ub_c[k], + p->meta_row_height_luma[k], + p->meta_row_height_chroma[k], + + // Output + &p->dpte_row_bw[k], + &p->meta_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int urgent_ramp_uclk_cycles, + unsigned int df_qos_response_time_fclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_urgent_ramp_latency_margin, + double fabric_max_transport_latency_margin) +{ + double urgent_latency = 0; + if (qos_type == dml2_qos_param_type_dcn4x) { + urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); + } else { + urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + } +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + } else { + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + } + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); +#endif + return urgent_latency; +} + +static double CalculateTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int trip_to_memory_uclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4x) { + trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + } else { + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); +#endif + + + return trip_to_memory_us; +} + +static double CalculateMetaTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int meta_trip_to_memory_uclk_cycles, + unsigned int meta_trip_to_memory_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double meta_trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4x) { + meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + meta_trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4x) { + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + } else { + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); +#endif + + + return meta_trip_to_memory_us; +} + +static void calculate_cursor_req_attributes( + unsigned int cursor_width, + unsigned int cursor_bpp, + + // output + unsigned int *cursor_lines_per_chunk, + unsigned int *cursor_bytes_per_line, + unsigned int *cursor_bytes_per_chunk, + unsigned int *cursor_bytes) +{ + unsigned int cursor_bytes_per_req = 0; + unsigned int cursor_width_bytes = 0; + unsigned int cursor_height = 0; + + //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. + //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B + //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width + + //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. + + cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); + if (cursor_width_bytes <= 64) + cursor_bytes_per_req = 64; + else if (cursor_width_bytes <= 128) + cursor_bytes_per_req = 128; + else + cursor_bytes_per_req = 256; + + //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. + *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); + + //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. + if (cursor_bpp == 2) { + *cursor_lines_per_chunk = 16; + } else if (cursor_bpp == 32) { + if (cursor_width <= 32) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 4; + else + *cursor_lines_per_chunk = 2; + } else if (cursor_bpp == 64) { + if (cursor_width <= 16) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 32) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 4; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 2; + else + *cursor_lines_per_chunk = 1; + } else { + if (cursor_width > 0) { + DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + DML_ASSERT(0); + } + } + + *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; + + // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. + // Only cursor_width is provided for worst case sizing so assume that the cursor is square + cursor_height = cursor_width; + *cursor_bytes = *cursor_bytes_per_line * cursor_height; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); + DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); +#endif +} + +static void calculate_cursor_urgent_burst_factor( + unsigned int CursorBufferSize, + unsigned int CursorWidth, + unsigned int cursor_bytes_per_chunk, + unsigned int cursor_lines_per_chunk, + double LineTime, + double UrgentLatency, + + double *UrgentBurstFactorCursor, + bool *NotEnoughUrgentLatencyHiding) +{ + unsigned int LinesInCursorBuffer = 0; + double CursorBufferSizeInTime = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; + + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 1; + } else { + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + + } +} + +static void CalculateUrgentBurstFactor( + const struct dml2_plane_parameters *plane_cfg, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + // Output + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorLuma = 0; + *UrgentBurstFactorChroma = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + DML_ASSERT(VRatio > 0); + + LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 1; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 1; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif +} + +static void CalculateDCFCLKDeepSleepTdlut( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + double dispclk, + unsigned int tdlut_bytes_to_deliver[], + double prefetch_swath_time_us[], + + // Output + double *DCFClkDeepSleep) +{ + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; + double ReadBandwidth = 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + + // adjust for 3dlut delivery time + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { + double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; + + DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + + // increase the deepsleep dcfclk to match the original dispclk throughput rate + if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk); + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0); + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} + +static noinline_for_stack void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double zero_double[DML2_MAX_PLANES]; + unsigned int zero_integer[DML2_MAX_PLANES]; + + memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double)); + memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int)); + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + NumberOfActiveSurfaces, + BytePerPixelY, + BytePerPixelC, + SwathWidthY, + SwathWidthC, + DPPPerSurface, + PSCL_THROUGHPUT, + PSCL_THROUGHPUT_CHROMA, + Dppclk, + ReadBandwidthLuma, + ReadBandwidthChroma, + ReturnBusWidth, + 0, + zero_integer, //tdlut_bytes_to_deliver, + zero_double, //prefetch_swath_time_us, + + // Output + DCFClkDeepSleep); +} + +static double CalculateWriteBackDelay( + enum dml2_source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static unsigned int CalculateMaxVStartup( + bool ptoi_supported, + unsigned int vblank_nom_default_us, + const struct dml2_timing_cfg *timing, + double write_back_delay_us) +{ + unsigned int vblank_size = 0; + unsigned int max_vstartup_lines = 0; + + double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); + unsigned int vblank_actual = timing->v_total - timing->v_active; + unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); + unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom; + + vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); + + if (timing->interlaced && !ptoi_supported) + max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); + DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); + DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); + return max_vstartup_lines; +} + +static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) +{ + unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 }; + unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 }; + + unsigned int TotalActiveDPP = 0; + bool NoChromaOrLinear = true; + unsigned int SurfaceDoingUnboundedRequest = 0; + unsigned int DETBufferSizeInKByteForSwathCalculation; + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth( + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->surf_linear128_l, + p->surf_linear128_c, + p->DPPPerSurface, + + // Output + p->req_per_swath_ub_l, + p->req_per_swath_ub_c, + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); + p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); +#endif + if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { + p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); + p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha + || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + NoChromaOrLinear = false; + } + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); + + CalculateDETBufferSize( + &scratch->CalculateDETBufferSize_locals, + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + *p->UnboundedRequestEnabled, + p->nomDETInKByte, + p->MaxTotalDETInKByte, + p->ConfigReturnBufferSizeInKByte, + p->MinCompressedBufferSizeInKByte, + p->ConfigReturnBufferSegmentSizeInkByte, + p->CompressedBufferSegmentSizeInkByte, + p->ReadBandwidthLuma, + p->ReadBandwidthChroma, + p->full_swath_bytes_l, + p->full_swath_bytes_c, + p->DPPPerSurface, + + // Output + p->DETBufferSizeInKByte, // per hubp pipe + p->CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + *p->ViewportSizeSupport = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + + if (p->surf_linear128_l[k]) + p->request_size_bytes_luma[k] = 128; + else + p->request_size_bytes_luma[k] = 256; + + if (p->surf_linear128_c[k]) + p->request_size_bytes_chroma[k] = 128; + else + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + + } else { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + } + + if (p->SwathHeightC[k] == 0) + p->request_size_bytes_chroma[k] = 0; + + if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); +#endif + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); +#endif + + *p->hw_debug5 = false; +#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE + if (p->NumberOfActiveSurfaces > 1) + *p->hw_debug5 = true; +#else + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); +#endif + } +#endif +} + +static enum dml2_odm_mode DecideODMMode(unsigned int HActive, + double MaxDispclk, + unsigned int MaximumPixelsPerLinePerDSCUnit, + enum dml2_output_format_class OutFormat, + bool UseDSC, + unsigned int NumberOfDSCSlices, + double SurfaceRequiredDISPCLKWithoutODMCombine, + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + double SurfaceRequiredDISPCLKWithODMCombineFourToOne) +{ + enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock; + enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive; + enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive; + enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass; + + MinimumRequiredODMModeForMaxDispClock = + (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass : + (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 : + (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMaxDispClock) + ODMMode = MinimumRequiredODMModeForMaxDispClock; + + if (UseDSC) { + MinimumRequiredODMModeForMaxDSCHActive = + (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass : + (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 : + (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive) + ODMMode = MinimumRequiredODMModeForMaxDSCHActive; + } + + if (OutFormat == dml2_420) { + MinimumRequiredODMModeForMax420HActive = + (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass : + (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 : + (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1; + if (ODMMode < MinimumRequiredODMModeForMax420HActive) + ODMMode = MinimumRequiredODMModeForMax420HActive; + } + + if (UseDSC) { + if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4) + ODMMode = dml2_odm_mode_combine_2to1; + if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8) + ODMMode = dml2_odm_mode_combine_3to1; + if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12) + ODMMode = dml2_odm_mode_combine_4to1; + } + + return ODMMode; +} + +static void CalculateODMConstraints( + enum dml2_odm_mode ODMUse, + double SurfaceRequiredDISPCLKWithoutODMCombine, + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + double SurfaceRequiredDISPCLKWithODMCombineFourToOne, + unsigned int MaximumPixelsPerLinePerDSCUnit, + /* Output */ + double *DISPCLKRequired, + unsigned int *NumberOfDPPRequired, + unsigned int *MaxHActiveForDSC, + unsigned int *MaxDSCSlices, + unsigned int *MaxHActiveFor420) +{ + switch (ODMUse) { + case dml2_odm_mode_combine_2to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPPRequired = 2; + break; + case dml2_odm_mode_combine_3to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + *NumberOfDPPRequired = 3; + break; + case dml2_odm_mode_combine_4to1: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPPRequired = 4; + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + case dml2_odm_mode_bypass: + default: + *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPPRequired = 1; + break; + } + *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit; + *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC; + *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH; +} + +static bool ValidateODMMode(enum dml2_odm_mode ODMMode, + double MaxDispclk, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + bool UseDSC, + unsigned int NumberOfDSCSlices, + unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, + unsigned int MaxNumDPP, + unsigned int MaxNumOPP, + double DISPCLKRequired, + unsigned int NumberOfDPPRequired, + unsigned int MaxHActiveForDSC, + unsigned int MaxDSCSlices, + unsigned int MaxHActiveFor420) +{ + bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true; + bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1); + unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1; + unsigned int h_timing_div_mode = + (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 : + (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle; + + if (DISPCLKRequired > MaxDispclk) + return false; + if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP) + return false; + if (are_odm_segments_symmetrical) { + if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle)) + return false; + } + if (HActive % h_timing_div_mode) + /* + * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and + * OTG_H_SYNC_A_START/END all need to be visible by h timing div + * mode. This logic only checks H active. + */ + return false; + + if (UseDSC) { + if (HActive > MaxHActiveForDSC) + return false; + if (NumberOfDSCSlices > MaxDSCSlices) + return false; + if (HActive % NumberOfDSCSlices) + return false; + if (NumberOfDSCSlices % NumberOfDPPRequired) + return false; + if (is_max_dsc_slice_required) { + if (NumberOfDSCSlices != MaxDSCSlices) + return false; + } + } + + if (OutFormat == dml2_420) { + if (HActive > MaxHActiveFor420) + return false; + } + + return true; +} + +static noinline_for_stack void CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + enum dml2_output_encoder_class Output, + enum dml2_odm_mode ODMUse, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int TotalNumberOfActiveOPP, + unsigned int MaxNumDPP, + unsigned int MaxNumOPP, + double PixelClock, + unsigned int NumberOfDSCSlices, + + // Output + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum dml2_odm_mode *ODMMode, + double *RequiredDISPCLKPerSurface) +{ + double SurfaceRequiredDISPCLKWithoutODMCombine; + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + double SurfaceRequiredDISPCLKWithODMCombineFourToOne; + double DISPCLKRequired; + unsigned int NumberOfDPPRequired; + unsigned int MaxHActiveForDSC; + unsigned int MaxDSCSlices; + unsigned int MaxHActiveFor420; + bool success; + bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); + enum dml2_odm_mode DecidedODMMode; + bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); + DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); +#endif + if (ODMUse == dml2_odm_mode_auto) + DecidedODMMode = DecideODMMode(HActive, + MaxDispclk, + MaximumPixelsPerLinePerDSCUnit, + OutFormat, + UseDSC, + NumberOfDSCSlices, + SurfaceRequiredDISPCLKWithoutODMCombine, + SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + SurfaceRequiredDISPCLKWithODMCombineFourToOne); + else + DecidedODMMode = ODMUse; + CalculateODMConstraints(DecidedODMMode, + SurfaceRequiredDISPCLKWithoutODMCombine, + SurfaceRequiredDISPCLKWithODMCombineTwoToOne, + SurfaceRequiredDISPCLKWithODMCombineThreeToOne, + SurfaceRequiredDISPCLKWithODMCombineFourToOne, + MaximumPixelsPerLinePerDSCUnit, + &DISPCLKRequired, + &NumberOfDPPRequired, + &MaxHActiveForDSC, + &MaxDSCSlices, + &MaxHActiveFor420); + success = ValidateODMMode(DecidedODMMode, + MaxDispclk, + HActive, + OutFormat, + UseDSC, + NumberOfDSCSlices, + TotalNumberOfActiveDPP, + TotalNumberOfActiveOPP, + MaxNumDPP, + MaxNumOPP, + DISPCLKRequired, + NumberOfDPPRequired, + MaxHActiveForDSC, + MaxDSCSlices, + MaxHActiveFor420); + + *ODMMode = DecidedODMMode; + *TotalAvailablePipesSupport = success; + *NumberOfDPP = NumberOfDPPRequired; + *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); +#endif +} + +static noinline_for_stack void CalculateOutputLink( + struct dml2_core_internal_scratch *s, + double PHYCLK, + double PHYCLKD18, + double PHYCLKD32, + double Downspreading, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + enum dml2_dsc_enable_option DSCEnable, + unsigned int OutputLinkDPLanes, + enum dml2_output_link_dp_rate OutputLinkDPRate, + + // Output + bool *RequiresDSC, + bool *RequiresFEC, + double *OutBpp, + enum dml2_core_internal_output_type *OutputType, + enum dml2_core_internal_output_type_rate *OutputRate, + unsigned int *RequiredSlots) +{ + bool LinkDSCEnable; + unsigned int dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml2_core_internal_output_type_unknown; + *OutputRate = dml2_core_internal_output_rate_unknown; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); +#endif + { + if (Output == dml2_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml2_core_internal_output_type_hdmi; + } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp || Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml2_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; + } + } + } else if (Output == dml2_hdmifrl) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + *RequiresFEC = false; + } + *OutBpp = 0; + if (PHYCLKD18 >= 3000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "3x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "8x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "10x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "12x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; + } + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); +#endif +} + +static double CalculateWriteBackDISPCLK( + enum dml2_source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; + return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); +} + +static double RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum dml2_output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout) +{ + if (DSCEnable != true) { + return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); + double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static unsigned int DSCDelayRequirement( + bool DSCEnabled, + enum dml2_odm_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum dml2_output_format_class OutputFormat, + enum dml2_output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd) +{ + unsigned int DSCDelayRequirement_val = 0; + unsigned int NumberOfDSCSlicesFactor = 1; + + if (DSCEnabled == true && OutputBpp != 0) { + + if (ODMMode == dml2_odm_mode_combine_4to1) + NumberOfDSCSlicesFactor = 4; + else if (ODMMode == dml2_odm_mode_combine_3to1) + NumberOfDSCSlicesFactor = 3; + else if (ODMMode == dml2_odm_mode_combine_2to1) + NumberOfDSCSlicesFactor = 2; + + DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), + (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); + + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); + DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static void CalculateSurfaceSizeInMall( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int BytesPerPixelY[], + unsigned int BytesPerPixelC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + // Output + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize) +{ + unsigned int TotalSurfaceSizeInMALLForSS = 0; + unsigned int TotalSurfaceSizeInMALLForSubVP = 0; + unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; + const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; + + if (composition->viewport.stationary) { + SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - + math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * + math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), + math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - + math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - + math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * + math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + } else { + SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + } + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + /* SS and Subvp counted separate as they are never used at the same time */ + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; + else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) + TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; + } + + *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || + (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); +#endif +} + +static void calculate_tdlut_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_tdlut_setting_params *p) +{ + // locals + unsigned int tdlut_bpe = 8; + unsigned int tdlut_width; + unsigned int tdlut_pitch_bytes; + unsigned int tdlut_footprint_bytes; + unsigned int vmpg_bytes; + unsigned int tdlut_vmpg_per_frame; + unsigned int tdlut_pte_req_per_frame; + unsigned int tdlut_bytes_per_line; + double tdlut_drain_rate; + unsigned int tdlut_mpc_width; + unsigned int tdlut_bytes_per_group_simple; + + if (!p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = 0; + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = 0; + *p->tdlut_bytes_to_deliver = 0; + *p->tdlut_bytes_per_group = 0; + *p->tdlut_pte_bytes_per_frame = 0; + *p->tdlut_bytes_per_frame = 0; + return; + } + + if (p->tdlut_mpc_width_flag) { + tdlut_mpc_width = 33; + tdlut_bytes_per_group_simple = 39*256; + } else { + tdlut_mpc_width = 17; + tdlut_bytes_per_group_simple = 10*256; + } + + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 4916; + else + tdlut_width = 35940; + } else { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 17; + else // dml2_tdlut_width_33_cube + tdlut_width = 33; + } + + if (p->is_gfx11) + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment + else + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) + tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; + else + tdlut_footprint_bytes = tdlut_pitch_bytes; + + if (!p->gpuvm_enable) { + tdlut_vmpg_per_frame = 0; + tdlut_pte_req_per_frame = 0; + } else { + tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; + tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; + } + tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request + *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { + //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice + *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; + *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; + //the delivery cycles is DispClk cycles per line * number of lines * number of slices + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); + } else { + //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements + *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); + *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); + tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; + } + + //the tdlut is fetched during the 2 row times of prefetch. + if (p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + + DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); + DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); + DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); +#endif +} + +static void CalculateTarb( + const struct dml2_display_cfg *display_cfg, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + double ReturnBW, + unsigned int MetaChunkSize, + + // output + double *Tarb, + double *Tarb_prefetch) +{ + double extra_bytes = 0; + double extra_bytes_prefetch = 0; + double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); + + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + extra_bytes = extra_bytes + (MetaChunkSize * 1024); + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; + } + + extra_bytes_prefetch = extra_bytes; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (display_cfg->gpuvm_enable == true) { + extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; + } + } + *Tarb = extra_bytes / ReturnBW; + *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); +#endif +} + +static double CalculateTWait( + long reserved_vblank_time_ns, + double UrgentLatency, + double Ttrip, + double g6_temp_read_blackout_us) +{ + double TWait; + double t_urg_trip = math_max2(UrgentLatency, Ttrip); + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} + + +static void CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + // Output + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + unsigned int *VUpdateWidthPix, + unsigned int *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static double get_urgent_bandwidth_required( + struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, + const struct dml2_display_cfg *display_cfg, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool inc_flip_bw, // including flip bw + bool use_qual_row_bw, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double PrefetchBandwidthMax[], + double excess_vactive_fill_bw_l[], + double excess_vactive_fill_bw_c[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[], + /* outputs */ + double surface_required_bw[], + double surface_peak_required_bw[]) +{ + // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS + // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation + + memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; + l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + + l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; + l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; + l->adj_factor_cur = UrgentBurstFactorCursor[k]; + l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; + l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; + l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); + bool exclude_this_plane = false; + + // Exclude phantom pipe in bw calculation for non svp prefetch state + if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) + exclude_this_plane = true; + + // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip. + // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe. + if (use_qual_row_bw) { + if (display_cfg->hostvm_enable) + l->per_plane_flip_bw[k] = 0; // qual_row_bw + else if (!display_cfg->plane_descriptors[k].immediate_flip) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + } else { + // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM) + if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + else + l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; + } + + if (!exclude_this_plane) { + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max); + + /* export peak required bandwidth for the surface */ + surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif + } else { + surface_required_bw[k] = 0.0; + } + + l->required_bandwidth_mbps += surface_required_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + + DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); +#endif + } + + return l->required_bandwidth_mbps; +} + +static void CalculateExtraLatency( + const struct dml2_display_cfg *display_cfg, + unsigned int ROBBufferSizeInKByte, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + double FabricClock, + unsigned int PixelChunkSizeInKByte, + double ReturnBW, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + enum dml2_qos_param_type qos_type, + bool max_outstanding_when_urgent_expected, + unsigned int max_outstanding_requests, + unsigned int request_size_bytes_luma[], + unsigned int request_size_bytes_chroma[], + unsigned int MetaChunkSize, + unsigned int dchub_arb_to_ret_delay, + double Ttrip, + unsigned int hostvm_mode, + + // output + double *ExtraLatency, // Tex + double *ExtraLatency_sr, // Tex_sr + double *ExtraLatencyPrefetch) + +{ + double Tarb; + double Tarb_prefetch; + double Tex_trips; + unsigned int max_request_size_bytes = 0; + + CalculateTarb( + display_cfg, + PixelChunkSizeInKByte, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + tdlut_bytes_per_group, + HostVMInefficiencyFactor, + HostVMInefficiencyFactorPrefetch, + HostVMMinPageSize, + ReturnBW, + MetaChunkSize, + // output + &Tarb, + &Tarb_prefetch); + + Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (request_size_bytes_luma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_luma[k]; + if (request_size_bytes_chroma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_chroma[k]; + } + + if (qos_type == dml2_qos_param_type_dcn4x) { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; + *ExtraLatency = *ExtraLatency_sr; + if (max_outstanding_when_urgent_expected) + *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; + } else { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; + *ExtraLatency = *ExtraLatency_sr; + } + *ExtraLatency = *ExtraLatency + Tex_trips; + *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; + *ExtraLatency = *ExtraLatency + Tarb; + *ExtraLatency_sr = *ExtraLatency_sr + Tarb; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); + DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); + DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); + DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); + DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); +#endif +} + +static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) +{ + struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; + bool dcc_mrq_enable; + + unsigned int vm_bytes; + unsigned int extra_tdpe_bytes; + unsigned int tdlut_row_bytes; + unsigned int Lo; + + s->NoTimeToPrefetch = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingVM = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + *p->Tvm_trips = 0.0; + *p->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + *p->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + *p->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; + s->Tsw_est3 = 0.0; + s->cursor_prefetch_bytes = 0; + *p->prefetch_cursor_bw = 0; + *p->RequiredPrefetchBWMax = 0.0; + + dcc_mrq_enable = (p->dcc_enable && p->mrq_present); + + s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip) + + if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { + s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->Ttrip; + *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); + if (dcc_mrq_enable) + *p->Tvm_trips_flip = *p->Tvm_trips; + else + *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; + + *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); + + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; + *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; + } else { + *p->Tdmdl_vm = 0; + *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex + } + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); + *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + + DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); +#endif + + if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); +#endif + if (p->display_cfg->gpuvm_enable) { + s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) + s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); + else + s->Tvm_trips_rounded = s->LineTime / 4.0; + *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; + } + + s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); + *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { + s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + s->Tr0_trips_rounded = s->LineTime / 4.0; + *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; + } + s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); + *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true) { + if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { + *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); + } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { + *p->Tno_bw = p->ExtraLatencyPrefetch; + } else { + *p->Tno_bw = 0; + } + } else { + *p->Tno_bw = 0; + } + + if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) + *p->Tno_bw_flip = *p->Tno_bw; + else + *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip + + if (dml_is_420(p->myPipe->SourcePixelFormat)) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; + + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; + extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); + + if (p->setup_for_tdlut) + vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); + + tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + + // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto + // Note: in prefetch calculation, acounting is done mostly per-pipe. + // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time + s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; + + // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) + s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; + + if (p->myPipe->BytePerPixelC > 0) { + s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; + s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; + } + + /* oto prefetch bw should be always be less than total vactive bw */ + //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; + + s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); + + s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; + + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + + /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. + * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule + * and the required bandwidth increases when going from ms to mp + */ + *p->RequiredPrefetchBWMax = s->prefetch_bw_oto; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); +#endif + + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_oto = math_max3( + *p->Tvm_trips, + *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); +#endif + } else { + s->Tvm_oto = s->Tvm_trips_rounded; + } + + if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { + s->Tr0_oto = math_max3( + *p->Tr0_trips, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = s->LineTime / 4.0; + + s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + if (p->impacted_dst_y_pre > 0) { + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + } +#endif + *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; + + //To (time for delay after scaler) in line time + Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + //Tpre_equ in line time + if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; + else + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); + + s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + + if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) + s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; +#endif + + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); + DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); +#endif + s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif + + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + + // Derive bandwidth by finding how much data to move within the time constraint + // Tpre_rounded is Tpre rounding to 2-bit fraction + // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time + // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time + // So that means prefetch bw calculated can be higher since the total time available for prefetch is less + bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + bool tpre_gt_req_latency = true; +#if 0 + // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. + // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. + // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. + tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); +#endif + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { + s->prefetch_bw1 = 0.; + s->prefetch_bw2 = 0.; + s->prefetch_bw3 = 0.; + s->prefetch_bw4 = 0.; + + // prefetch_bw1: VM + 2*R0 + SW + if (*p->Tpre_rounded - *p->Tno_bw > 0) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + + *p->prefetch_sw_bytes) + / (*p->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; + } else + s->prefetch_bw1 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / + (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); + DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); + DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); +#endif + } + + // prefetch_bw2: VM + SW + if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; + } else + s->prefetch_bw2 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + + // prefetch_bw3: 2*R0 + SW + if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; + } else + s->prefetch_bw3 = 0; + + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + } + + // prefetch_bw4: SW + if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw4 = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); + DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); + DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); +#endif + { + bool Case1OK = false; + bool Case2OK = false; + bool Case3OK = false; + + // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement + // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive + // vs the latency based number + + // prefetch_bw1: VM + 2*R0 + SW + // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data) + // here is to make sure equ bw wont be more agressive than the latency-based requirement. + // check vm time >= vm_trips + // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + + if (s->prefetch_bw1 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { + Case1OK = true; + } + } + + // prefetch_bw2: VM + SW + // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw) + // check vm time >= vm_trips + // check r0 time < r0_trips + if (s->prefetch_bw2 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { + Case2OK = true; + } + } + + // prefetch_bw3: VM + 2*R0 + // check vm time < vm_trips + // check r0 time >= r0_trips + if (s->prefetch_bw3 > 0) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { + Case3OK = true; + } + } + + if (Case1OK) { + s->prefetch_bw_equ = s->prefetch_bw1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->prefetch_bw2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->prefetch_bw3; + } else { + s->prefetch_bw_equ = s->prefetch_bw4; + } + + s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); + DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); + DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + + if (s->prefetch_bw_equ > 0) { + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { + s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes + *p->Tr0_trips, + s->LineTime / 4); + } else { + s->Tr0_equ = s->LineTime / 4; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); +#endif + // Use the more stressful prefetch schedule + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->dst_y_prefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingVM = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); +#endif + + } else { + *p->dst_y_prefetch = s->dst_y_prefetch_equ; + + if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) + *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; + + s->TimeForFetchingVM = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + + /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming. + * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data + * bandwidth may end up higher than what was calculated in mode support. + */ + *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); +#endif + } + + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + + s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); + *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); + + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); +#endif + DML_ASSERT(*p->dst_y_prefetch < 64); + + unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); + if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, + (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; + *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); +#endif + } else { + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + s->NoTimeToPrefetch = true; + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (vm_bytes == 0) { + prefetch_vm_bw = 0; + } else if (*p->dst_y_per_vm_vblank > 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + } + + if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { + prefetch_row_bw = 0; + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->NoTimeToPrefetch = true; + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + } + + *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->NoTimeToPrefetch) { + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->dst_y_prefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; + } + + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + + return s->NoTimeToPrefetch; +} + +static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, + unsigned int line_buffer_size_bits, + unsigned int num_pipes, + unsigned int vp_width, + unsigned int vp_height, + double h_ratio, + enum dml2_rotation_angle rotation_angle) +{ + unsigned int num_lb_source_lines = 0; + double lb_bit_per_pixel = 57.0; + unsigned recin_width = vp_width/num_pipes; + + if (dml_is_vertical_rotation(rotation_angle)) + recin_width = vp_height/num_pipes; + + num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, + math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); + + return num_lb_source_lines; +} + +static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) +{ + int max_value = -1; + int max_idx = -1; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { + max_value = Trpd_dcfclk_cycles[i]; + max_idx = i; + } + } + if (max_idx <= 0) { + DML_ASSERT(max_idx >= 0); + max_idx = this_plane_idx; + } + + return max_idx; +} + +static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) +{ + double sum = 0.; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != exclude_plane_idx) { + sum += prefetch_swath_bytes[i]; + } + } + return sum / bw_mbps; +} + +// a global check against the aggregate effect of the per plane prefetch schedule +static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) +{ + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; + unsigned int i, k; + + memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); + + *p->recalc_prefetch_schedule = 0; + s->prefetch_global_check_passed = 1; + // worst case if the rob and cdb is fully hogged + s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); +#endif + + // calculate the return impact from each plane, request is 256B per dcfclk + for (i = 0; i < p->num_active_planes; i++) { + s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; + s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; + s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; + s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; + + if (p->pixel_format[i] == dml2_420_10) { + s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); + s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); + s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); + s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); + } + + s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); +#endif + + if (s->src_swath_bytes_c[i] > 0) { // dual_plane + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); + + if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); +#endif + } + + s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate + s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); +#endif + // clamping to worst case delay which is one which occupy the full rob+cdb + if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) + s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; + } + + // Figure out the impacted prefetch time for each plane + // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw + for (i = 0; i < p->num_active_planes; i++) { + k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i + // the rest of planes (except for k) complete for bw + p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; + p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); + p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); +#endif + } + + if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { + for (i = 0; i < p->num_active_planes; i++) { + if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { + s->prefetch_global_check_passed = 0; + *p->recalc_prefetch_schedule = 1; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); +#endif + } + } else { + // likely a mode programming calls, assume support, and no recalc - not used anyways + s->prefetch_global_check_passed = 1; + *p->recalc_prefetch_schedule = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); +#endif + + return s->prefetch_global_check_passed; +} + +static void calculate_peak_bandwidth_required( + struct dml2_core_internal_scratch *s, + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) +{ + unsigned int n; + unsigned int m; + + struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); +#endif + + for (unsigned int k = 0; k < p->num_active_planes; ++k) { + l->unity_array[k] = 1.0; + l->zero_array[k] = 0.0; + } + + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWMax + l->zero_array, + l->zero_array, + l->zero_array, + p->dpte_row_bw, + p->meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + l->zero_array, + p->surface_avg_vactive_required_bw[m][n], + p->surface_peak_required_bw[m][n]); + + p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWMax + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + p->inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + 0, //inc_flip_bw + 1, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + p->urgent_burst_factor_l, + p->urgent_burst_factor_c, + p->urgent_burst_factor_cursor, + p->urgent_burst_factor_prefetch_l, + p->urgent_burst_factor_prefetch_c, + p->urgent_burst_factor_prefetch_cursor, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + + p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + p->display_cfg, + m, + n, + p->inc_flip_bw, + 0, //use_qual_row_bw + p->num_active_planes, + p->num_of_dpp, + p->dcc_dram_bw_nom_overhead_factor_p0, + p->dcc_dram_bw_nom_overhead_factor_p1, + p->dcc_dram_bw_pref_overhead_factor_p0, + p->dcc_dram_bw_pref_overhead_factor_p1, + p->mall_prefetch_sdp_overhead_factor, + p->mall_prefetch_dram_overhead_factor, + p->surface_read_bandwidth_l, + p->surface_read_bandwidth_c, + p->prefetch_bandwidth_l, + p->prefetch_bandwidth_c, + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw + p->excess_vactive_fill_bw_l, + p->excess_vactive_fill_bw_c, + p->cursor_bw, + p->dpte_row_bw, + p->meta_row_bw, + p->prefetch_cursor_bw, + p->prefetch_vmrow_bw, + p->flip_bw, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->surface_dummy_bw, + p->surface_peak_required_bw[m][n]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); +#endif + DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); + } + } +} + +static void check_urgent_bandwidth_support( + double *frac_urg_bandwidth_nom, + double *frac_urg_bandwidth_mall, + bool *vactive_bandwidth_support_ok, // vactive ok + bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok + + unsigned int mall_allocated_for_dcn_mbytes, + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + double frac_urg_bandwidth_mall_sdp; + double frac_urg_bandwidth_mall_dram; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0) + frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + else + frac_urg_bandwidth_mall_sdp = 0.0; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0) + frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + else + frac_urg_bandwidth_mall_dram = 0.0; + + *bandwidth_support_ok = 1; + *vactive_bandwidth_support_ok = 1; + + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch + + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + + if (mall_allocated_for_dcn_mbytes > 0) { + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + + *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); + *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); + + *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); + + if (mall_allocated_for_dcn_mbytes > 0) + *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); + + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + if (mall_allocated_for_dcn_mbytes > 0) { + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); + } + } +#endif +} + +static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double flip_bw_available_mbps; + double flip_bw_available_sdp_mbps; + double flip_bw_available_dram_mbps; + + flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; + flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; + flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); +#endif + + return flip_bw_available_mbps; +} + +static void calculate_immediate_flip_bandwidth_support( + // Output + double *frac_urg_bandwidth_flip, + bool *flip_bandwidth_support_ok, + + // Input + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; + double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; + + *flip_bandwidth_support_ok = true; + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); +#endif + DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); + } + + *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; + *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); + } + } +#endif +} + +static void CalculateFlipSchedule( + struct dml2_core_internal_scratch *s, + bool iflip_enable, + bool use_lb_flip_bw, + double HostVMInefficiencyFactor, + double Tvm_trips_flip, + double Tr0_trips_flip, + double Tvm_trips_flip_rounded, + double Tr0_trips_flip_rounded, + bool GPUVMEnable, + double vm_bytes, // vm_bytes + double DPTEBytesPerRow, // dpte_row_bytes + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum dml2_source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw_flip, + unsigned int dpte_row_height, + unsigned int dpte_row_height_chroma, + bool use_one_row_for_frame_flip, + unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, + unsigned int per_pipe_flip_bytes, + unsigned int meta_row_bytes, + unsigned int meta_row_height, + unsigned int meta_row_height_chroma, + bool dcc_mrq_enable, + + // Output + double *dst_y_per_vm_flip, + double *dst_y_per_row_flip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; + + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dpte_row_bytes = DPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); +#endif + + if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { + if (l->dual_plane) { + if (dcc_mrq_enable & GPUVMEnable) { + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); + } else if (GPUVMEnable) { + l->min_row_height = dpte_row_height; + l->min_row_height_chroma = dpte_row_height_chroma; + } else { + l->min_row_height = meta_row_height; + l->min_row_height_chroma = meta_row_height_chroma; + } + l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); + } else { + if (dcc_mrq_enable & GPUVMEnable) + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + else if (GPUVMEnable) + l->min_row_height = dpte_row_height; + else + l->min_row_height = meta_row_height; + + l->min_row_time = l->min_row_height * LineTime / VRatio; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); +#endif + DML_ASSERT(l->min_row_time > 0); + + if (use_lb_flip_bw) { + // For mode check, calculation the flip bw requirement with worst case flip time + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + + //The lower bound on flip bandwidth + // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required + l->lb_flip_bw = 0; + + if (iflip_enable) { + l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; + l->num_rows = 2; + l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); + l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; + l->lb_flip_bw = math_max3( + l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), + l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), + l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); + DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + + if (l->lb_flip_bw > 0) { + DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); + } +#endif + l->lb_flip_bw = math_max3(l->lb_flip_bw, + l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); +#endif + } + + *final_flip_bw = l->lb_flip_bw; + + *dst_y_per_vm_flip = 1; // not used + *dst_y_per_row_flip = 1; // not used + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); + } else { + if (iflip_enable) { + l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); + DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); +#endif + if (l->ImmediateFlipBW == 0) { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + } else { + l->Tvm_flip = math_max3(Tvm_trips_flip, + Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, + LineTime / 4.0); + + l->Tr0_flip = math_max3(Tr0_trips_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, + LineTime / 4.0); + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); +#endif + *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; + *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; + + *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); + + if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + +#ifdef __DML_VBA_DEBUG__ + if (!use_lb_flip_bw) { + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); + } + DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) +{ + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + double reserved_vblank_time_us; + bool FoundCriticalSurface = false; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); +#endif + + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } + p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); +#endif + + s->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequired) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequired) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequired) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); + } + + *p->global_fclk_change_supported = true; + *p->global_dram_clock_change_supported = true; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; + double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; + double LBBitPerPixel = 57; + + s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); + s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); + DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; + } + s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; + + if (p->VActiveLatencyHidingMargin) + p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + + if (p->VActiveLatencyHidingUs) + p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; + + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + + uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; + reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; + + p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; + if (s->ActiveFCLKChangeLatencyMargin[k] > 0) + p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) + p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; + + if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) + *p->global_fclk_change_supported = false; + + p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; + if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; + else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; + } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; + + if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) + *p->global_dram_clock_change_supported = false; + + s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); + s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); + s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); + s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; + + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); + else + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + + *p->g6_temp_read_support = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && + (s->g6_temp_read_latency_margin[k] < 0)) { + *p->g6_temp_read_support = false; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} + +static void calculate_bytes_to_fetch_required_to_hide_latency( + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p) +{ + unsigned int dst_lines_to_hide; + unsigned int src_lines_to_hide_l; + unsigned int src_lines_to_hide_c; + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index])) + continue; + + stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index; + + dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us / + ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total / + (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0)); + + src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide, + p->swath_height_l[plane_index]); + p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index]; + + src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide, + p->swath_height_c[plane_index]); + p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index]; + + if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) { + p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index]; + if (p->meta_row_height_c[plane_index]) { + p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index]; + } + } + + if (p->display_cfg->gpuvm_enable == true) { + p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index]; + if (p->dpte_row_height_c[plane_index]) { + p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index]; + } + } + } +} + +static noinline_for_stack void calculate_vactive_det_fill_latency( + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + unsigned int bytes_required_l[], + unsigned int bytes_required_c[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double surface_read_bw_l[], + double surface_read_bw_c[], + double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], + double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES], + /* output */ + double vactive_det_fill_delay_us[]) +{ + double effective_excess_bandwidth; + double effective_excess_bandwidth_l; + double effective_excess_bandwidth_c; + double adj_factor; + unsigned int plane_index; + unsigned int soc_state; + unsigned int bw_type; + + for (plane_index = 0; plane_index < num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) + continue; + + vactive_det_fill_delay_us[plane_index] = 0.0; + for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) { + for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) { + effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]); + + /* luma */ + adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0; + + effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; + if (effective_excess_bandwidth_l > 0.0) { + vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l); + } + + /* chroma */ + adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0; + + effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor; + if (effective_excess_bandwidth_c > 0.0) { + vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c); + } + } + } + } +} + +static void calculate_excess_vactive_bandwidth_required( + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + unsigned int bytes_required_l[], + unsigned int bytes_required_c[], + /* outputs */ + double excess_vactive_fill_bw_l[], + double excess_vactive_fill_bw_c[]) +{ + unsigned int plane_index; + + for (plane_index = 0; plane_index < num_active_planes; plane_index++) { + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index])) + continue; + + excess_vactive_fill_bw_l[plane_index] = 0.0; + excess_vactive_fill_bw_c[plane_index] = 0.0; + + if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) { + excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; + excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us; + } + } +} + +static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + double bw_mbps = 0; + bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return bw_mbps; +} + +static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_mhz = 0; + + uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return uclk_mhz; +} + +static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); + return index; +} + +static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = false; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = true; + break; + } + } + + if (!clk_entry_found) + DML_ASSERT(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +static unsigned int get_pipe_flip_bytes( + double hostvm_inefficiency_factor, + unsigned int vm_bytes, + unsigned int dpte_row_bytes, + unsigned int meta_row_bytes) +{ + unsigned int flip_bytes = 0; + + flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes); + flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor); + + return flip_bytes; +} + +static void calculate_hostvm_inefficiency_factor( + double *HostVMInefficiencyFactor, + double *HostVMInefficiencyFactorPrefetch, + + bool gpuvm_enable, + bool hostvm_enable, + unsigned int remote_iommu_outstanding_translations, + unsigned int max_outstanding_reqs, + double urg_bandwidth_avail_active_pixel_and_vm, + double urg_bandwidth_avail_active_vm_only) +{ + *HostVMInefficiencyFactor = 1; + *HostVMInefficiencyFactorPrefetch = 1; + + if (gpuvm_enable && hostvm_enable) { + *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; + *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; + + if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) + *HostVMInefficiencyFactorPrefetch = 4; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); +#endif + } +} + +struct dml2_core_internal_g6_temp_read_blackouts_table { + struct { + unsigned int uclk_khz; + unsigned int blackout_us; + } entries[DML_MAX_CLK_TABLE_SIZE]; +}; + +struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { + .entries = { + { + .uclk_khz = 96000, + .blackout_us = 23, + }, + { + .uclk_khz = 435000, + .blackout_us = 10, + }, + { + .uclk_khz = 521000, + .blackout_us = 10, + }, + { + .uclk_khz = 731000, + .blackout_us = 8, + }, + { + .uclk_khz = 822000, + .blackout_us = 8, + }, + { + .uclk_khz = 962000, + .blackout_us = 5, + }, + { + .uclk_khz = 1069000, + .blackout_us = 5, + }, + { + .uclk_khz = 1187000, + .blackout_us = 5, + }, + }, +}; + +static double get_g6_temp_read_blackout_us( + struct dml2_soc_bb *soc, + unsigned int uclk_freq_khz, + unsigned int min_clk_index) +{ + unsigned int i; + unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; + + if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) { + /* overrides are present in the SoC BB */ + return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index]; + } + + /* use internal table */ + blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz || + core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) { + break; + } + + blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us; + } + + return (double)blackout_us; +} + +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + +static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, + const struct dml2_display_cfg *display_cfg) +{ + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + + double min_return_bw_for_latency; + unsigned int k; + + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { + mode_lib->ms.support.PrefetchSupported = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); + + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + s->line_times[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); + + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; + s->mSOCParameters.SMNLatency = 0; + s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); + +} + + +static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + + double outstanding_latency_us = 0; + + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; + unsigned int k, m, n; + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); + + mode_lib->ms.num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->ms.state_idx = in_out_params->min_clk_index; + mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); + mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); + mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); + mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; + mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; + mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); + mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); + +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + // Many core tests are still setting scaling parameters "incorrectly" + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false + && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 + || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k], + &mode_lib->ms.surf_linear128_l[k], + &mode_lib->ms.surf_linear128_c[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * + display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); +#endif + } + + // Writeback bandwidth + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k][0] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && + (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + + /* Writeback Scale Ratio and Taps Support Check */ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + // Max Viewport Size support + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + s->MaximumSwathWidthSupportLuma = 15360; + } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video + s->MaximumSwathWidthSupportLuma = 7680 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video + s->MaximumSwathWidthSupportLuma = 4320 + 16; + } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha + s->MaximumSwathWidthSupportLuma = 5120 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp + s->MaximumSwathWidthSupportLuma = 3072 + 16; + } else { + s->MaximumSwathWidthSupportLuma = 6144 + 16; + } + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + + unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits; + unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits; + +/* +#if defined(DV_BUILD) + // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. + if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { + lb_buffer_size_bits_luma = 34620 * 57; + lb_buffer_size_bits_chroma = 13560 * 57; + } +#endif +*/ + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); + } + + mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /* Cursor Support Check */ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) + mode_lib->ms.support.CursorSupport = false; + } + } + + /* Valid Pitch Check */ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + // data pitch + unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; + + if (mode_lib->ms.surf_linear128_l[k]) + alignment_l = alignment_l / 2; + + mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; + + if (mode_lib->ms.surf_linear128_c[k]) + alignment_c = alignment_c / 2; + mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); + } else { + mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; + } + + if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || + mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { + mode_lib->ms.support.PitchSupport = false; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); +#endif + } + + // meta pitch + if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) + mode_lib->ms.support.PitchSupport = false; + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, + display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) + mode_lib->ms.support.PitchSupport = false; + } + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); +#endif + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + + // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; + mode_lib->ms.support.TotalAvailablePipesSupport = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + /*Number Of DSC Slices*/ + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) { + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) + mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices; + else { + if (s->PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); + } else if (s->PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (s->PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (s->PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, + mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ms.TotalNumberOfActiveOPP, + mode_lib->ip.max_num_dpp, + mode_lib->ip.max_num_opp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + CalculateOutputLink( + &mode_lib->scratch, + ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), + mode_lib->soc.phy_downspread_percent, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + s->PixelClockBackEnd[k], + s->OutputBpp[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, + s->ODMModeNoDSC, + s->ODMModeDSC, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBpp[k], + &mode_lib->ms.OutputType[k], + &mode_lib->ms.OutputRate[k], + &mode_lib->ms.RequiredSlots[k]); + + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMMode[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); +#endif + + // ensure the number dsc slices is integer multiple based on ODM mode + mode_lib->ms.support.DSCSlicesODMModeSupported = true; + if (mode_lib->ms.RequiresDSC[k]) { + // fail a ms check if the override num_slices doesn't align with odm mode setting + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) { + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { + DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + } +#endif + } else { + // safe guard to ensure the dml derived dsc slices and odm setting are compatible + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 12; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); + } + + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + } + + mode_lib->ms.support.incorrect_imall_usage = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mode_lib->ms.support.incorrect_imall_usage = 1; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + mode_lib->ms.NoOfOPP[k] = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 4; + mode_lib->ms.NoOfOPP[k] = 4; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 3; + mode_lib->ms.NoOfOPP[k] = 3; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.NoOfOPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + } + } else { + if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + } + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); +#endif + } + + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.TotalNumberOfActiveOPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k]; + } + if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; + for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ip.writeback_line_buffer_buffer_size)); + } + } + + mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); + } + + mode_lib->ms.GlobalDPPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); + + /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + s->TotalNumberOfActiveWriteback = 0; + memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { + s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) + s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup + //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + //} + } + } + } + } + + /* Writeback Mode Support Check */ + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); + mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); + + + mode_lib->ms.support.ExceededMultistreamSlots = false; + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || + ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1) { + // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) + // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // } + //} + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //} + } + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl && + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + s->PixelClockBackEnd[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + mode_lib->ms.OutputBpp[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); + + if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } else { + /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus + * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider + * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK + * required - by setting phantom dtbclk to 0 we ignore it. + */ + mode_lib->ms.RequiredDTBCLK[k] = 0; + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { + s->DSCFormatFactor = 2; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { + s->DSCFormatFactor = 1; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; + if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + } + } + } + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { + s->NumDSCUnitRequired = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->NumDSCUnitRequired = 4; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->NumDSCUnitRequired = 3; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->NumDSCUnitRequired = 2; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; + + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; + } + + mode_lib->ms.support.NotEnoughDSCUnits = false; + if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Figure out the swath and DET configuration after the num dpp per plane is figured out + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.SurfaceSizeInMALL[k] = 0; + mode_lib->ms.support.ExceededMALLSize = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; + + s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; + CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; + CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; + CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; + CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + mode_lib->ms.support.PTEBufferSizeNotExceeded = true; + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded = false; + + if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); +#endif + + /* VActive bytes to fetch for UCLK P-State */ + calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; + calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + + /* outputs */ + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; + + calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); + + /* Excess VActive bandwidth required to fill DET */ + calculate_excess_vactive_bandwidth_required( + display_cfg, + mode_lib->ms.num_active_planes, + s->pstate_bytes_required_l, + s->pstate_bytes_required_c, + /* outputs */ + mode_lib->ms.excess_vactive_fill_bw_l, + mode_lib->ms.excess_vactive_fill_bw_c); + + mode_lib->ms.UrgLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = CalculateTripToMemory( + mode_lib->ms.UrgLatency, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool cursor_not_enough_urgent_latency_hiding = false; + + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } + + mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); +#endif + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + + // Output + &mode_lib->ms.UrgentBurstFactorLuma[k], + &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + CalculateDCFCLKDeepSleep( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + } + + // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + s->MaximumVStartup[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->ms.WritebackDelayTime[k]); + mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); +#endif + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequired = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; + } + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || + ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && + (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + s->FullFrameMALLPStateMethod = true; + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { + s->SubViewportMALLPStateMethod = true; + if (!display_cfg->overrides.enable_subvp_implicit_pmo) { + // For dv, small frame tests will have very high refresh rate + unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + if (refresh_rate > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + } + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || + (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); +#endif + + mode_lib->ms.support.OutstandingRequestsSupport = true; + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; + + mode_lib->ms.support.avg_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.avg_non_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); +#endif + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); +#endif + } + } + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; + + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + mode_lib->ms.num_active_planes); + } + + // Calculate all the bandwidth available + // Need anothe bw for latency evaluation + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, // not used + mode_lib->ms.support.avg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_min_latency, + mode_lib->ms.support.urg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.dram_bw_mbps); + + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, + mode_lib->ms.support.avg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_min, + mode_lib->ms.support.urg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_vm_only, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.MaxDCFCLK, + mode_lib->ms.MaxFabricClock, +#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW + mode_lib->ms.dram_bw_mbps); +#else + mode_lib->ms.max_dram_bw_mbps); +#endif + + // Average BW support check + calculate_avg_bandwidth_required( + mode_lib->ms.support.avg_bandwidth_required, + // input + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->ms.cursor_bw, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + mode_lib->ms.mall_prefetch_sdp_overhead_factor); + + for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); + } + + mode_lib->ms.support.AvgBandwidthSupport = true; + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; + DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + + } + } + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { + mode_lib->ms.support.AvgBandwidthSupport = false; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); +#endif + } + } + } + + dml_core_ms_prefetch_check(mode_lib, display_cfg); + + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; + + //Re-ordering Buffer Support Check + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } else { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } + + /* VActive fill time calculations (informative) */ + calculate_vactive_det_fill_latency( + display_cfg, + mode_lib->ms.num_active_planes, + s->pstate_bytes_required_l, + s->pstate_bytes_required_c, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->ms.surface_avg_vactive_required_bw, + mode_lib->ms.surface_peak_required_bw, + /* outputs */ + mode_lib->ms.dram_change_vactive_det_fill_delay_us); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); +#endif + + /*Mode Support, Voltage State and SOC Configuration*/ + { + if (mode_lib->ms.support.ScaleRatioAndTapsSupport + && mode_lib->ms.support.SourceFormatPixelAndScanSupport + && mode_lib->ms.support.ViewportSizeSupport + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSC422NativeNotSupported + && mode_lib->ms.support.DSCSlicesODMModeSupported + && !mode_lib->ms.support.NotEnoughDSCUnits + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && mode_lib->ms.support.ROBSupport + && mode_lib->ms.support.OutstandingRequestsSupport + && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance + && mode_lib->ms.support.DISPCLK_DPPCLK_Support + && mode_lib->ms.support.TotalAvailablePipesSupport + && mode_lib->ms.support.NumberOfOTGSupport + && mode_lib->ms.support.NumberOfHDMIFRLSupport + && mode_lib->ms.support.NumberOfDP2p0Support + && mode_lib->ms.support.EnoughWritebackUnits + && mode_lib->ms.support.WritebackLatencySupport + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport + && mode_lib->ms.support.CursorSupport + && mode_lib->ms.support.PitchSupport + && !mode_lib->ms.support.ViewportExceedsSurface + && mode_lib->ms.support.PrefetchSupported + && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport + && mode_lib->ms.support.AvgBandwidthSupport + && mode_lib->ms.support.DynamicMetadataSupported + && mode_lib->ms.support.VRatioInPrefetchSupported + && mode_lib->ms.support.PTEBufferSizeNotExceeded + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded + && !mode_lib->ms.support.ExceededMALLSize + && mode_lib->ms.support.g6_temp_read_support + && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { + DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport = true; + } else { + DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport = false; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; + +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); +#endif + } + +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.ModeSupport) + dml2_print_mode_support_info(&mode_lib->ms.support, true); + + DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); +#endif + + return mode_lib->ms.support.ModeSupport; +} + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + unsigned int result; + + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); + result = dml_core_mode_support(in_out_params); + + if (result) + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + + DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); + + return result; +} + +static void CalculatePixelDeliveryTimes( + const struct dml2_display_cfg *display_cfg, + const struct core_display_cfg_support_info *cfg_support_info, + unsigned int NumberOfActiveSurfaces, + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + + // Output + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) +{ + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); +#endif + } +} + +static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + double pixel_clock_mhz; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { + meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; + meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * + p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + if (p->BytePerPixelC[k] == 0) { + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + } + } else { + p->TimePerMetaChunkNominal[k] = 0; + p->TimePerMetaChunkVBlank[k] = 0; + p->TimePerMetaChunkFlip[k] = 0; + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); +#endif + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; + else + p->time_per_tdlut_group[k] = 0; + + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + + if (p->display_cfg->gpuvm_enable == true) { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); + } + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); + } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + if (p->BytePerPixelC[k] == 0) { + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); + } + + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); + } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + } + } else { + p->time_per_pte_group_nom_luma[k] = 0; + p->time_per_pte_group_vblank_luma[k] = 0; + p->time_per_pte_group_flip_luma[k] = 0; + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelC[], + double dst_y_per_vm_vblank[], + double dst_y_per_vm_flip[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int tdlut_pte_bytes_per_frame[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + bool mrq_present, + + // Output + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int num_group_per_lower_vm_stage = 0; + unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage_flip; + unsigned int num_group_per_lower_vm_stage_pref; + unsigned int num_req_per_lower_vm_stage_flip; + unsigned int num_req_per_lower_vm_stage_pref; + double line_time; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (display_cfg->gpuvm_enable) { + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + + if (BytePerPixelC[k] > 0) + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } + + if (dcc_mrq_enable) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)); + } + } + + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); + if (display_cfg->gpuvm_max_page_table_levels >= 2) + num_group_per_lower_vm_stage_pref += 1; // tdpe0 group + } + + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; + } + + if (dcc_mrq_enable) { + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; + } + + num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; + num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; + } + + line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; + + if (num_req_per_lower_vm_stage_pref > 0) + TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; + else + TimePerVMRequestVBlank[k] = 0.0; + if (num_req_per_lower_vm_stage_flip > 0) + TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; + else + TimePerVMRequestFlip[k] = 0.0; + + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); + + if (display_cfg->gpuvm_max_page_table_levels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} + +static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateStutterEfficiency_params *p) +{ + struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; + + unsigned int TotalNumberOfActiveOTG = 0; + double SinglePixelClock = 0; + unsigned int SingleHTotal = 0; + unsigned int SingleVTotal = 0; + bool SameTiming = true; + bool FoundCriticalSurface = false; + + memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionLuma = 2; + } else { + l->MaximumEffectiveCompressionLuma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionChroma = 2; + } else { + l->MaximumEffectiveCompressionChroma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; + } + } else { + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; + l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); +#endif + if (l->AverageDCCZeroSizeFraction == 1) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; + + + } else if (l->AverageDCCZeroSizeFraction > 0) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + + (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) + : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); +#endif + } else { + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + + ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); + DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); +#endif + + *p->StutterPeriod = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); + l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); +#endif + + if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { + bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = l->DETBufferingTimeY; + l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; + l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; + l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); +#endif + } + } + } + + // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer + // stutter period is calculated only on the det sizing + // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress + // else + // the cdb + rob part will be in compressed rate with urg bw (idea bw) + // the det part will be return at uncompressed rate with 64B/dcfclk + // + // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" + // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk + + l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer + / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); +#endif + l->TotalActiveWriteback = 0; + memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { + + if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) + l->TotalActiveWriteback = l->TotalActiveWriteback + 1; + + if (TotalNumberOfActiveOTG == 0) { // first otg + SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; + } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || + SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || + SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1; + } + } + } + + if (l->TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } else { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->StutterEfficiency = 0; + *p->NumberOfStutterBurstsPerFrame = 0; + } + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } else { + *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->Z8StutterEfficiency = 0.; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); + DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); + DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); + DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} + +static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + struct dml2_display_cfg_programming *programming = in_out_params->programming; + + struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; + struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; + + unsigned int k; + bool must_support_iflip; + const long min_return_uclk_cycles = 83; + const long min_return_fclk_cycles = 75; + const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; + double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; + double max_uclk_mhz = 0; + double min_return_latency_in_DCFCLK_cycles = 0; + + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); + + s->num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); + dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); + + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || + cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || + cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + + if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + + switch (cfg_support_info->stream_support_info[stream_index].odms_used) { + case (4): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; + break; + case (3): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; + break; + case (2): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; + break; + default: + if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; + else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; + break; + } + } + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; + DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); + } + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; + DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + } + + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; + + DML_ASSERT(mode_lib->mp.Dcfclk > 0); + DML_ASSERT(mode_lib->mp.FabricClock > 0); + DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); + DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); + DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); + DML_ASSERT(mode_lib->mp.Dispclk > 0); + DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); + DML_ASSERT(s->SOCCLK > 0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + for (k = 0; k < s->num_active_planes; ++k) { + DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + } + DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { + DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + } + + for (k = 0; k < s->num_active_planes; k++) + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &s->MaxTotalDETInKByte, + &s->NomDETInKByte, + &s->MinCompressedBufferSizeInKByte); + + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + + /* Output */ + &mode_lib->mp.PSCL_THROUGHPUT[k], + &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], + &mode_lib->mp.DPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + // Output + &mode_lib->mp.BytePerPixelY[k], + &mode_lib->mp.BytePerPixelC[k], + &mode_lib->mp.BytePerPixelInDETY[k], + &mode_lib->mp.BytePerPixelInDETC[k], + &mode_lib->mp.Read256BlockHeightY[k], + &mode_lib->mp.Read256BlockHeightC[k], + &mode_lib->mp.Read256BlockWidthY[k], + &mode_lib->mp.Read256BlockWidthC[k], + &mode_lib->mp.MacroTileHeightY[k], + &mode_lib->mp.MacroTileHeightC[k], + &mode_lib->mp.MacroTileWidthY[k], + &mode_lib->mp.MacroTileWidthC[k], + &mode_lib->mp.surf_linear128_l[k], + &mode_lib->mp.surf_linear128_c[k]); + } + + CalculateSwathWidth( + display_cfg, + false, // ForceSingleDPP + s->num_active_planes, + mode_lib->mp.ODMMode, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.surf_linear128_l, + mode_lib->mp.surf_linear128_c, + mode_lib->mp.NoOfDPP, + + /* Output */ + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + mode_lib->mp.SwathWidthSingleDPPY, + mode_lib->mp.SwathWidthSingleDPPC, + mode_lib->mp.SwathWidthY, + mode_lib->mp.SwathWidthC, + s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] + s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub); + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + + // Calculate DET size, swath height here. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + // DSC Delay + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, + mode_lib->mp.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Prefetch + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < s->num_active_planes; ++k) + mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + s->num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.MacroTileWidthY, + mode_lib->mp.MacroTileWidthC, + mode_lib->mp.MacroTileHeightY, + mode_lib->mp.MacroTileHeightC, + + /* Output */ + mode_lib->mp.SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; + s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; + CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; + CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; + CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < s->num_active_planes; k++) { + mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < s->num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + s->num_active_planes); + } + + // Calculate all the bandwidth availabe + calculate_bandwidth_available( + mode_lib->mp.avg_bandwidth_available_min, + mode_lib->mp.avg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_min, + mode_lib->mp.urg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_vm_only, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->mp.dram_bw_mbps); + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < s->num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; + } + // Calculate tdlut schedule related terms + for (k = 0; k <= s->num_active_planes - 1; k++) { + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.request_size_bytes_luma, + mode_lib->mp.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->mp.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->mp.ExtraLatency, + &mode_lib->mp.ExtraLatency_sr, + &mode_lib->mp.ExtraLatencyPrefetch); + + mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->mp.WritebackDelay[k] = + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; + } else + mode_lib->mp.WritebackDelay[k] = 0; + } + + /* VActive bytes to fetch for UCLK P-State */ + calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg; + calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes; + calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY; + calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY; + calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC; + calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + + /* outputs */ + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l; + calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c; + + calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params); + + /* Excess VActive bandwidth required to fill DET */ + calculate_excess_vactive_bandwidth_required( + display_cfg, + s->num_active_planes, + s->pstate_bytes_required_l, + s->pstate_bytes_required_c, + /* outputs */ + mode_lib->mp.excess_vactive_fill_bw_l, + mode_lib->mp.excess_vactive_fill_bw_c); + + mode_lib->mp.UrgentLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = CalculateTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); + + mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); + + for (k = 0; k < s->num_active_planes; ++k) { + bool cursor_not_enough_urgent_latency_hiding = false; + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + s->line_times[k], + mode_lib->mp.UrgentLatency, + + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } + mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + s->line_times[k], + mode_lib->mp.UrgentLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + + /* output */ + &mode_lib->mp.UrgentBurstFactorLuma[k], + &mode_lib->mp.UrgentBurstFactorChroma[k], + &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->mp.WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); +#endif + } + + s->immediate_flip_required = false; + for (k = 0; k < s->num_active_planes; ++k) { + s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); +#endif + + if (s->num_active_planes > 1) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming + } + + { + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + mode_lib->mp.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); + + myPipe->Dppclk = mode_lib->mp.Dppclk[k]; + myPipe->Dispclk = mode_lib->mp.Dispclk; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->mp.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0]; + + mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + if (s->impacted_dst_y_pre[k] > 0) + mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; + else + mode_lib->mp.impacted_prefetch_margin_us[k] = 0; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); +#endif + mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; + } // for k + + mode_lib->mp.PrefetchModeSupported = true; + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NoTimeToPrefetch[k] == true || + mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || + mode_lib->mp.DSTYAfterScaler[k] > 8) { + DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + if (mode_lib->mp.dst_y_prefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + s->VRatioPrefetchMoreThanMax = true; + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } + + if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + + if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + mode_lib->mp.PrefetchModeSupported = false; + } + + DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); + + // Prefetch schedule OK, now check prefetch bw + if (mode_lib->mp.PrefetchModeSupported == true) { + for (k = 0; k < s->num_active_planes; ++k) { + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + mode_lib->mp.VRatioPrefetchY[k], + mode_lib->mp.VRatioPrefetchC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + /* Output */ + &mode_lib->mp.UrgentBurstFactorLumaPre[k], + &mode_lib->mp.UrgentBurstFactorChromaPre[k], + &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); +#endif + } + + for (k = 0; k <= s->num_active_planes - 1; k++) + mode_lib->mp.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth + &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall + &s->dummy_boolean[1], // vactive bw ok + &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.non_urg_bandwidth_required, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.PrefetchModeSupported) + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + } // prefetch schedule ok + + // Prefetch schedule and prefetch bw ok, now check flip bw + if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw + + mode_lib->mp.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_qual, // no flip + mode_lib->mp.urg_bandwidth_available); + mode_lib->mp.TotImmediateFlipBytes = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); +#endif + } + for (k = 0; k < s->num_active_planes; ++k) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 0, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.BandwidthAvailableForImmediateFlip, + mode_lib->mp.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.Tno_bw[k], + mode_lib->mp.dpte_row_height[k], + mode_lib->mp.dpte_row_height_chroma[k], + mode_lib->mp.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->mp.meta_row_bytes[k], + mode_lib->mp.meta_row_height[k], + mode_lib->mp.meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + // Output + &mode_lib->mp.dst_y_per_vm_flip[k], + &mode_lib->mp.dst_y_per_row_flip[k], + &mode_lib->mp.final_flip_bw[k], + &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip + &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok + + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.ImmediateFlipSupported) + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->mp.ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); +#endif + } + } + } else { // flip or prefetch not support + mode_lib->mp.ImmediateFlipSupported = false; + } + + // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) + must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; + mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + for (k = 0; k < s->num_active_planes; ++k) + DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); +#endif + DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + + if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { + DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + } else { + DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + + // DCC Configuration + for (k = 0; k < s->num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + display_cfg->plane_descriptors[k].surface.dcc.enable, + display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.plane0.width, + display_cfg->plane_descriptors[k].surface.plane1.width, + display_cfg->plane_descriptors[k].surface.plane0.height, + display_cfg->plane_descriptors[k].surface.plane1.height, + s->NomDETInKByte, + mode_lib->mp.Read256BlockHeightY[k], + mode_lib->mp.Read256BlockHeightC[k], + display_cfg->plane_descriptors[k].surface.tiling, + mode_lib->mp.BytePerPixelY[k], + mode_lib->mp.BytePerPixelC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + display_cfg->plane_descriptors[k].composition.rotation_angle, + + /* Output */ + &mode_lib->mp.RequestLuma[k], + &mode_lib->mp.RequestChroma[k], + &mode_lib->mp.DCCYMaxUncompressedBlock[k], + &mode_lib->mp.DCCCMaxUncompressedBlock[k], + &mode_lib->mp.DCCYMaxCompressedBlock[k], + &mode_lib->mp.DCCCMaxCompressedBlock[k], + &mode_lib->mp.DCCYIndependentBlock[k], + &mode_lib->mp.DCCCIndependentBlock[k]); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; + s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; + s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; + s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.USRRetrainingLatency = 0; + s->mmSOCParameters.SMNLatency = 0; + s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = s->SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; + CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; + CalculateWatermarks_params->VActiveLatencyHidingUs = 0; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); + } else { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + + DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + display_cfg, + cfg_support_info, + s->num_active_planes, + mode_lib->mp.VRatioPrefetchY, + mode_lib->mp.VRatioPrefetchC, + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub, + mode_lib->mp.PSCL_THROUGHPUT, + mode_lib->mp.PSCL_THROUGHPUT_CHROMA, + mode_lib->mp.Dppclk, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + + /* Output */ + mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, + mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, + mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); + + CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; + CalculateMetaAndPTETimes_params->display_cfg = display_cfg; + CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; + CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; + CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; + CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; + CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; + CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; + CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; + CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; + CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; + CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; + CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; + CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + + CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; + CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; + + CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); + + CalculateVMGroupAndRequestTimes( + display_cfg, + s->num_active_planes, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.dst_y_per_vm_vblank, + mode_lib->mp.dst_y_per_vm_flip, + mode_lib->mp.dpte_row_width_luma_ub, + mode_lib->mp.dpte_row_width_chroma_ub, + mode_lib->mp.vm_group_bytes, + mode_lib->mp.dpde0_bytes_per_frame_ub_l, + mode_lib->mp.dpde0_bytes_per_frame_ub_c, + s->tdlut_pte_bytes_per_frame, + mode_lib->mp.meta_pte_bytes_per_frame_ub_l, + mode_lib->mp.meta_pte_bytes_per_frame_ub_c, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + mode_lib->mp.TimePerVMGroupVBlank, + mode_lib->mp.TimePerVMGroupFlip, + mode_lib->mp.TimePerVMRequestVBlank, + mode_lib->mp.TimePerVMRequestFlip); + + // VStartup Adjustment + for (k = 0; k < s->num_active_planes; ++k) { + bool isInterlaceTiming; + + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; + if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + + mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; + if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { + mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; + } + + isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + + // The actual positioning of the vstartup + mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; + s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; + + if (s->blank_lines_remaining < 0) { + DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + DML_ASSERT(0); + } + mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= + (isInterlaceTiming ? + math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + mode_lib->mp.TotalWRBandwidth = 0; + for (k = 0; k < display_cfg->num_streams; ++k) { + s->WRBandwidth = 0; + if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { + s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / + (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height + / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) + * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); + mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth; + } + } + + mode_lib->mp.TotalDataReadBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); +#endif + } + + CalculateStutterEfficiency_params->display_cfg = display_cfg; + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; + CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; + + // output + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; + + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); +#else + mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; + mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; +#endif + } // PrefetchAndImmediateFlipSupported + + max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; + min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; + mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; + mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); + DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); +#endif + return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); +} + +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); + bool result = dml_core_mode_programming(in_out_params); + + DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); + return result; +} + +void dml2_core_calcs_get_dpte_row_height( + unsigned int *dpte_row_height, + struct dml2_core_internal_display_mode_lib *mode_lib, + bool is_plane1, + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + enum dml2_rotation_angle ScanDirection, + unsigned int pitch, + unsigned int GPUVMMinPageSizeKBytes) +{ + unsigned int BytePerPixelY; + unsigned int BytePerPixelC; + double BytePerPixelInDETY; + double BytePerPixelInDETC; + unsigned int BlockHeight256BytesY; + unsigned int BlockHeight256BytesC; + unsigned int BlockWidth256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int MacroTileWidthY; + unsigned int MacroTileWidthC; + unsigned int MacroTileHeightY; + unsigned int MacroTileHeightC; + bool surf_linear_128_l = false; + bool surf_linear_128_c = false; + + CalculateBytePerPixelAndBlockSizes( + SourcePixelFormat, + SurfaceTiling, + pitch, + pitch, + + /* Output */ + &BytePerPixelY, + &BytePerPixelC, + &BytePerPixelInDETY, + &BytePerPixelInDETC, + &BlockHeight256BytesY, + &BlockHeight256BytesC, + &BlockWidth256BytesY, + &BlockWidth256BytesC, + &MacroTileHeightY, + &MacroTileHeightC, + &MacroTileWidthY, + &MacroTileWidthC, + &surf_linear_128_l, + &surf_linear_128_c); + + unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; + unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; + unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; + unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; + unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; + unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); + DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); +#endif + unsigned int dummy_integer[21]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel; + mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests; + mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight; + mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20]; + + // just supply with enough parameters to calculate dpte + CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); +#endif +} + +static bool is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = false; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = true; + + return ret_val; +} + +static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz); + wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz); + wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); +} + +static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) +{ + int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - + (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); + cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); +#endif + + cursor_dlg_regs->chunk_hdl_adjust = 3; + cursor_dlg_regs->dst_y_offset = 0; + + cursor_dlg_regs->qos_level_fixed = 8; + cursor_dlg_regs->qos_ramp_disable = 0; +} + +static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + unsigned int pipe_idx) +{ + unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; + enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; + bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format)); + + unsigned int pixel_chunk_bytes = 0; + unsigned int min_pixel_chunk_bytes = 0; + unsigned int dpte_group_bytes = 0; + unsigned int mpte_group_bytes = 0; + + unsigned int p1_pixel_chunk_bytes = 0; + unsigned int p1_min_pixel_chunk_bytes = 0; + unsigned int p1_dpte_group_bytes = 0; + unsigned int p1_mpte_group_bytes = 0; + + unsigned int detile_buf_plane1_addr = 0; + unsigned int detile_buf_size_in_bytes; + double stored_swath_l_bytes; + double stored_swath_c_bytes; + bool is_phantom_pipe; + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); + min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); + mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml2_rgbe_alpha) + p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); + + rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib); + rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); + rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); + + rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); + rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); + rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); + rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); + + detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); + + if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { + unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); +#endif + DML_ASSERT(p0_pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; + if (dual_plane) { + unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); +#endif + if (sw_mode == dml2_sw_linear) { + DML_ASSERT(p1_pte_row_height_linear >= 8); + } + rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; + } + } else { + rq_regs->rq_regs_l.pte_row_height_linear = 0; + rq_regs->rq_regs_c.pte_row_height_linear = 0; + } + + rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0); + rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0); + + // FIXME_DCN4, programming guide has dGPU condition + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + + stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); + stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); + is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + //DML_LOG_VERBOSE_rq_regs_st(rq_regs); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +static void rq_dlg_get_dlg_reg( + struct dml2_core_internal_scratch *s, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + const unsigned int pipe_idx) +{ + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + + l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); + + l->source_format = dml2_444_8; + l->odm_mode = dml2_odm_mode_bypass; + l->dual_plane = false; + l->htotal = 0; + l->hactive = 0; + l->hblank_end = 0; + l->vblank_end = 0; + l->interlaced = false; + l->pclk_freq_in_mhz = 0.0; + l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + l->ref_freq_to_pix_freq = 0.0; + + if (l->plane_idx < DML2_MAX_PLANES) { + + l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; + l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; + l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; + + l->dual_plane = is_dual_plane(l->source_format); + + l->htotal = l->timing->h_total; + l->hactive = l->timing->h_active; + l->hblank_end = l->timing->h_blank_end; + l->vblank_end = l->timing->v_blank_end; + l->interlaced = l->timing->interlaced; + l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; + l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + + DML_ASSERT(l->refclk_freq_in_mhz != 0); + DML_ASSERT(l->pclk_freq_in_mhz != 0); + DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (l->odm_mode == dml2_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); + } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + l->num_active_pipes = mode_lib->mp.num_active_pipes; + l->first_pipe_idx_in_plane = DML2_MAX_PLANES; + l->pipe_idx_in_combine = 0; // pipe index within the plane + l->odm_combine_factor = 2; + + if (l->odm_mode == dml2_odm_mode_combine_3to1) + l->odm_combine_factor = 3; + else if (l->odm_mode == dml2_odm_mode_combine_4to1) + l->odm_combine_factor = 4; + + for (unsigned int i = 0; i < l->num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { + if (i < l->first_pipe_idx_in_plane) { + l->first_pipe_idx_in_plane = i; + } + } + } + l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + } + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits + + l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); + + DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + + l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); + disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; + + DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + + DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + + l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + + if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { + DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + } + + l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; + l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; + + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + + // Active + l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + + l->refcyc_per_line_delivery_pre_c = 0.0; + l->refcyc_per_line_delivery_c = 0.0; + + if (l->dual_plane) { + l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + + l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + + l->refcyc_per_req_delivery_pre_c = 0.0; + l->refcyc_per_req_delivery_c = 0.0; + if (l->dual_plane) { + l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + } + + // TTU - Cursor + DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); + DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); + + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + + l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); + + l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); + + disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); + + // CHECK for HW registers' range, DML_ASSERT or clamp + DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); + + + DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + + if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; + } + if (l->dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; + } + } + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); + if (l->dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); + } + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + if (l->dual_plane) { + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + } + + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + + } +} + +static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; + arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max + arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; + arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024; + arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); + arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); +#endif + +} + +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) +{ + rq_dlg_get_wm_regs(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) +{ + rq_dlg_get_arb_params(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, + struct dml2_core_internal_display_mode_lib *mode_lib, + struct dml2_dchub_per_pipe_register_set *out, int pipe_index) +{ + rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); + rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); + out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; +} + +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) +{ + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) +{ + dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); +} + +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; + + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } +} + +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + union dmub_cmd_fams2_config *fams2_base_programming, + union dmub_cmd_fams2_config *fams2_sub_programming, + enum dml2_pstate_method pstate_method, + int plane_index) +{ + const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; + const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index]; + + struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; + union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; + + unsigned int i; + + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + + /* from display configuration */ + base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; + base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; + base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch); + base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_descriptor->timing.v_active); + base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; + + /* from meta */ + base_programming->otg_vline_time_ns = + (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0); + base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines; + base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines; + base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines; + base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_pstate_meta->method_drr.programming_delay_otg_vlines); + base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines; + base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal; + + /* from core */ + base_programming->config.bits.min_ttu_vblank_usable = true; + for (i = 0; i < display_cfg->display_config.num_planes; i++) { + /* check if all planes support p-state in blank */ + if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && + mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { + base_programming->config.bits.min_ttu_vblank_usable = false; + break; + } + } + + switch (pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + /* legacy vactive */ + base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; + sub_programming->legacy.vactive_det_fill_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + /* legacy vblank */ + base_programming->type = FAMS2_STREAM_TYPE_VBLANK; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_fw_drr: + /* drr */ + base_programming->type = FAMS2_STREAM_TYPE_DRR; + sub_programming->drr.programming_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines; + sub_programming->drr.nom_stretched_vtotal = + (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal; + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline; + /* drr only clamps to vtotal min for single display */ + base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; + sub_programming->drr.only_stretch_if_required = true; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + /* subvp */ + base_programming->type = FAMS2_STREAM_TYPE_SUBVP; + sub_programming->subvp.vratio_numerator = + (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); + sub_programming->subvp.vratio_denominator = 1000; + sub_programming->subvp.programming_delay_otg_vlines = + (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines; + sub_programming->subvp.prefetch_to_mall_otg_vlines = + (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + sub_programming->subvp.phantom_vtotal = + (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal; + sub_programming->subvp.phantom_vactive = + (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive; + sub_programming->subvp.config.bits.is_multi_planar = + plane_descriptor->surface.plane1.height > 0; + sub_programming->subvp.config.bits.is_yuv420 = + plane_descriptor->pixel_format == dml2_420_8 || + plane_descriptor->pixel_format == dml2_420_10 || + plane_descriptor->pixel_format == dml2_420_12; + + base_programming->allow_start_otg_vline = + (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_na: + case dml2_pstate_method_count: + default: + /* this should never happen */ + break; + } +} + +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) +{ + unsigned int n; + + out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx); + out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx); + out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx); + out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx); + + for (n = 0; n < out->num_mcaches_plane0; n++) + out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n); + + for (n = 0; n < out->num_mcaches_plane1; n++) + out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n); + + out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx); + out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx); + out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx); + + out->valid = true; +} + +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) +{ + *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) +{ + out->mall_svp_size_requirement_ways = 0; + + out->nominal_vblank_pstate_latency_hiding_us = + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); + + out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; + + out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; + + out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]); +} + +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) +{ + double phantom_processing_delay_pix; + unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; + unsigned int phantom_v_active_lines; + unsigned int phantom_v_startup_lines; + unsigned int phantom_v_blank_lines; + unsigned int main_v_blank_lines; + unsigned int rem; + + phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); + phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); + dml2_core_div_rem(phantom_processing_delay_pix, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, + &rem); + if (rem) + phantom_processing_delay_lines++; + + phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; + + // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) + phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; + main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; + if (phantom_v_blank_lines > main_v_blank_lines) + phantom_v_blank_lines = main_v_blank_lines; + + out->phantom_v_active = phantom_v_active_lines; + // phantom_vtotal = vactive + vblank + out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; + + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; + + out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); +#endif +} + +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) +{ + unsigned int k, n; + + out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; + out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; + out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; + out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; + out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; + out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; + out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; + out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; + out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; + out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; + out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; + out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; + out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; + out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; + out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; + out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; + out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; + out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; + out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; + out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; + out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; + out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; + out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support; + out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; + + out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; + out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; + out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; + out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; + out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; + out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; + out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; + + out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; + out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; + out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; + out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; + out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; + + out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; + out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; + out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; + + out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; + out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; + out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; + out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; + out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; + out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport; + out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; + out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support; + out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support; + + for (k = 0; k < out->display_config.num_planes; k++) { + + out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; + out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; + out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; + out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; + out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; + out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; + out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; + out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; + + if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; + + if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4; + + out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; + out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; + } + + out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib); + out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib); + out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib); + out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib); + + out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib); + out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib); + out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib); + out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib); + out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); + out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); + out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); + out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); + + out->informative.mall.total_surface_size_in_mall_bytes = 0; + out->informative.dpp.total_num_dpps_required = 0; + for (k = 0; k < out->display_config.num_planes; ++k) { + out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; + out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k]; + } + + out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; + out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib); + + out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib); + out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib); + out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib); + + out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib); + out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib); + + out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib); + + out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib); + + out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib); + + out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib); + + out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib); + + out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib); + + out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib); + out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib); + + out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib); + out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib); + out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + + out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib); + out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib); + + out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib); + out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib); + out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib); + + out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib); + + out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); + + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + + out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); + + out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; + + for (k = 0; k < out->display_config.num_planes; k++) { + + if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 0; + else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 1; + else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) + out->informative.misc.PrefetchMode[k] = 2; + else + out->informative.misc.PrefetchMode[k] = 3; + + out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; + out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; + out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; + out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; + out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; + out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; + out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; + out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; + out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; + out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; + out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; + out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; + out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; + out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; + out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; + out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; + out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; + out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; + out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; + out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; + out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; + out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; + out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; + out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; + out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; + out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; + out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; + out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; + out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; + out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; + out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; + out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; + out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; + out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; + + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; + out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; + out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; + out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; + out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; + out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; + out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; + out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; + out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; + out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; + out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; + out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; + out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; + out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + + out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0; + out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; + out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; + out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; + out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; + out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; + out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; + out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + + if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) + out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; + } + + // For this DV informative layer, all pipes in the same planes will just use the same id + // will have the optimization and helper layer later on + // only work when we can have high "mcache" that fit everything without thrashing the cache + for (k = 0; k < out->display_config.num_planes; k++) { + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; + } + + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; + } + } + out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { + out->informative.misc.ROBUrgencyAvoidance = true; + } else { + out->informative.misc.ROBUrgencyAvoidance = false; + } + } else { + out->informative.misc.ROBUrgencyAvoidance = true; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h new file mode 100644 index 000000000000..27ef0e096b25 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_DCN4_CALCS_H__ +#define __DML2_CORE_DCN4_CALCS_H__ + +#include "dml2_core_shared_types.h" + +struct dml2_dchub_watermark_regs; +struct dml2_display_arb_regs; +struct dml2_per_stream_programming; +struct dml2_dchub_per_pipe_register_set; +struct core_plane_support_info; +struct core_stream_support_info; +struct dml2_cursor_dlg_regs; +struct display_configuation_with_meta; + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params); +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params); +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *dml2_display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index); +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_index); +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_index); +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); + +void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c new file mode 100644 index 000000000000..cc4f0663c6d6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_core_factory.h" +#include "dml2_core_dcn4.h" +#include "dml2_external_lib_deps.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_core_instance)); + + out->project_id = project_id; + + switch (project_id) { + case dml2_project_dcn4x_stage1: + result = false; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = &core_dcn4_initialize; + out->mode_support = &core_dcn4_mode_support; + out->mode_programming = &core_dcn4_mode_programming; + out->populate_informative = &core_dcn4_populate_informative; + out->calculate_mcache_allocation = &core_dcn4_calculate_mcache_allocation; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h new file mode 100644 index 000000000000..411c514fe65c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_FACTORY_H__ +#define __DML2_CORE_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h new file mode 100644 index 000000000000..051c31ec2f0e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_shared_types.h @@ -0,0 +1,2335 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_SHARED_TYPES_H__ +#define __DML2_CORE_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_display_cfg_types.h" +#include "dml_top_types.h" + +#define __DML_VBA_DEBUG__ +#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 1; + break; + case dml2_422_planar_10: + val = 1; + break; + case dml2_422_planar_12: + val = 1; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 1; + break; + case dml2_422_packed_10: + val = 1; + break; + case dml2_422_packed_12: + val = 1; + break; + default: + DML_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { + DML_ASSERT(0); + return 256; + }; +} + +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + return (byte_per_pixel != 2); +} + +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) +{ + return sw_mode == dml2_sw_linear; +}; + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) + version = 12; + else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) + version = 11; + else { + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = false; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = true; + break; + } + } + + if (!clk_entry_found) + DML_ASSERT(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = false; + + if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) + ret_val = true; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..95f0d017add4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c new file mode 100644 index 000000000000..22969a533a7b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_dpmm_dcn4.h" +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" +#include "lib_float_math.h" + +static double dram_bw_kbps_to_uclk_khz(unsigned long long bandwidth_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_khz = 0; + unsigned long uclk_mbytes_per_tick = 0; + + uclk_mbytes_per_tick = dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + uclk_khz = (double)bandwidth_kbps / uclk_mbytes_per_tick; + + return uclk_khz; +} + +static void get_minimum_clocks_for_latency(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, + double *uclk, + double *fclk, + double *dcfclk) +{ + int min_clock_index_for_latency; + + if (in_out->display_cfg->stage3.success) + min_clock_index_for_latency = in_out->display_cfg->stage3.min_clk_index_for_latency; + else + min_clock_index_for_latency = in_out->display_cfg->stage1.min_clk_index_for_latency; + + *dcfclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_dcfclk_khz; + *fclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_fclk_khz; + *uclk = dram_bw_kbps_to_uclk_khz(in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].pre_derate_dram_bw_kbps, + &in_out->soc_bb->clk_table.dram_config); +} + +static unsigned long dml_round_up(double a) +{ + if (a - (unsigned long)a > 0) { + return ((unsigned long)a) + 1; + } + return (unsigned long)a; +} + +static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + if (in_out->display_cfg->display_config.hostvm_enable) + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100); + else + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_fclk_latency, min_dcfclk_latency; + double min_uclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + /* assumes DF throttling is enabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + + /* assumes DF throttling is disabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg; + double min_fclk_avg; + double min_dcfclk_avg; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dram_derate_percent_pixel / 100); + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.fclk_derate_percent / 100); + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dcfclk_derate_percent / 100); + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); +} + +static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) +{ + enum dentist_divider_range { + DFS_DIVIDER_RANGE_1_START = 8, /* 2.00 */ + DFS_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ + DFS_DIVIDER_RANGE_2_START = 64, /* 16.00 */ + DFS_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ + DFS_DIVIDER_RANGE_3_START = 128, /* 32.00 */ + DFS_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ + DFS_DIVIDER_RANGE_4_START = 248, /* 62.00 */ + DFS_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ + DFS_DIVIDER_RANGE_SCALE_FACTOR = 4 + }; + + enum DFS_base_divider_id { + DFS_BASE_DID_1 = 0x08, + DFS_BASE_DID_2 = 0x40, + DFS_BASE_DID_3 = 0x60, + DFS_BASE_DID_4 = 0x7e, + DFS_MAX_DID = 0x7f + }; + + unsigned int divider; + + if (clock_khz < 1 || vco_freq_khz < 1 || clock_khz > vco_freq_khz) + return false; + + clock_khz *= 1.0 + margin; + + divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + + /* we want to floor here to get higher clock than required rather than lower */ + if (divider < DFS_DIVIDER_RANGE_2_START) { + if (divider < DFS_DIVIDER_RANGE_1_START) + *divider_id = DFS_BASE_DID_1; + else + *divider_id = DFS_BASE_DID_1 + ((divider - DFS_DIVIDER_RANGE_1_START) / DFS_DIVIDER_RANGE_1_STEP); + } else if (divider < DFS_DIVIDER_RANGE_3_START) { + *divider_id = DFS_BASE_DID_2 + ((divider - DFS_DIVIDER_RANGE_2_START) / DFS_DIVIDER_RANGE_2_STEP); + } else if (divider < DFS_DIVIDER_RANGE_4_START) { + *divider_id = DFS_BASE_DID_3 + ((divider - DFS_DIVIDER_RANGE_3_START) / DFS_DIVIDER_RANGE_3_STEP); + } else { + *divider_id = DFS_BASE_DID_4 + ((divider - DFS_DIVIDER_RANGE_4_START) / DFS_DIVIDER_RANGE_4_STEP); + if (*divider_id > DFS_MAX_DID) + *divider_id = DFS_MAX_DID; + } + + *rounded_khz = vco_freq_khz * DFS_DIVIDER_RANGE_SCALE_FACTOR / divider; + + return true; +} + +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + +static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) +{ + bool result = false; + int index = 0; + + if (clock_table->num_clk_values > 2) { + while (index < clock_table->num_clk_values && clock_table->clk_values_khz[index] < min_value) + index++; + + if (index < clock_table->num_clk_values) { + *rounded_value = clock_table->clk_values_khz[index]; + result = true; + } + } else if (clock_table->clk_values_khz[clock_table->num_clk_values - 1] >= min_value) { + *rounded_value = min_value; + result = true; + } + return result; +} + +static bool round_up_to_next_dpm(unsigned long *clock_value, const struct dml2_clk_table *clock_table) +{ + return round_up_and_copy_to_next_dpm(*clock_value, clock_value, clock_table); +} + +static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result; + + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); + + /* these clocks are optional, so they can fail to map, in which case map all to 0 */ + if (result) { + if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) { + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0; + } + } + + return result; +} + +static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result; + int index; + + result = false; + for (index = 0; index < state_table->uclk.num_clk_values; index++) { + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + result = true; + break; + } + } + + if (result) { + result = false; + for (index = 0; index < state_table->uclk.num_clk_values; index++) { + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + result = true; + break; + } + } + } + + // SVP is not supported on any coarse grained SoCs + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; + + return result; +} + +static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mode_support_result, struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result = false; + bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_identical = false; + unsigned int i; + + if (!state_table || !display_cfg) + return false; + + if (state_table->dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (state_table->fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + if (state_table->fclk.num_clk_values == state_table->dcfclk.num_clk_values && + state_table->fclk.num_clk_values == state_table->uclk.num_clk_values) { + clock_state_count_identical = true; + } + + if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_identical) + result = map_soc_min_clocks_to_dpm_fine_grained(display_cfg, state_table); + else + result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (result) + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); + } + + for (i = 0; i < display_cfg->display_config.num_streams; i++) { + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); + } + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); + + return result; +} + +static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + // Check that all displays timings are the same + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->stream_descriptors[remap_array[i - 1]].timing, &display_config->stream_descriptors[remap_array[i]].timing, sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + // Check if any displays are drr + for (i = 0; i < remap_array_size; i++) { + if (display_config->stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + // Trivial sync is possible if all displays are identical and none are DRR + return !contains_drr && identical; +} + +static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) +{ + unsigned int i; + int min_idle_us = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < in_out->programming->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + if (remap_array_size == 0) + return 0; + + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[0]].vblank_reserved_time_us; + + for (i = 1; i < remap_array_size; i++) { + if (min_idle_us > mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us) + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us; + } + + return min_idle_us; +} + +static bool determine_power_management_features_with_vblank_only(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int min_idle_us; + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, 0xF)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xF); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + + return true; +} + +static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_without_vactive_margin_mask = 0x0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (mode_support_result->cfg_support_info.plane_support_info[i].active_latency_hiding_us + < latency_hiding_requirement_us) + displays_without_vactive_margin_mask |= (0x1 << i); + } + + return displays_without_vactive_margin_mask; +} + +static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_with_fams_mask = 0x0; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config != dml2_svp_mode_override_auto) + displays_with_fams_mask |= (0x1 << i); + } + + return displays_with_fams_mask; +} + +static bool determine_power_management_features_with_vactive_and_vblank(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + if (in_out->programming->uclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + } + } + + if (in_out->programming->fclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + return true; +} + +static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int displays_without_fams_mask = 0x0; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + displays_without_fams_mask = + get_displays_with_fams_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if ((displays_without_vactive_margin_mask & ~displays_without_fams_mask) == 0) + in_out->programming->uclk_pstate_supported = true; + + return true; +} + +static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; +} + +static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; +} + +static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int i; + bool result; + double dispclk_khz; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + calculate_system_active_minimums(in_out); + calculate_svp_prefetch_minimums(in_out); + calculate_idle_minimums(in_out); + + // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore + // active minimums must be boosted to prefetch minimums + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; + + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; + + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; + + // need some massaging for the dispclk ramping cases: + dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); + // ramping margin should not make dispclk exceed the maximum dispclk speed: + dispclk_khz = math_min2(dispclk_khz, in_out->min_clk_table->max_clocks_khz.dispclk); + // but still the required dispclk can be more than the maximum dispclk speed: + dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + // DPP Ref is always set to max of all DPP clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + // DTB Ref is always set to max of all DTB clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + } + + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } + + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; + + result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); + + // By default, all power management features are not enabled + in_out->programming->fclk_pstate_supported = false; + in_out->programming->uclk_pstate_supported = false; + + return result; +} + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + + result = map_mode_to_soc_dpm(in_out); + + // Check if any can be enabled by nominal vblank idle time + determine_power_management_features_with_vblank_only(in_out); + + // Check if any can be enabled in vactive/vblank + determine_power_management_features_with_vactive_and_vblank(in_out); + + // Check if any can be enabled via fams + determine_power_management_features_with_fams(in_out); + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + return result; +} + +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + result = map_mode_to_soc_dpm(in_out); + + if (in_out->display_cfg->stage3.success) + in_out->programming->uclk_pstate_supported = true; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (displays_without_vactive_margin_mask == 0) { + in_out->programming->fclk_pstate_supported = true; + } else { + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xFF); + if (in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + in_out->programming->stutter.supported_in_blank = true; + else + in_out->programming->stutter.supported_in_blank = false; + + // TODO: Fix me Sam + if (in_out->soc_bb->power_management_parameters.z8_min_idle_time > 0 && + in_out->programming->informative.power_management.z8.stutter_period >= in_out->soc_bb->power_management_parameters.z8_min_idle_time) + in_out->programming->z8_stutter.meets_eco = true; + else + in_out->programming->z8_stutter.meets_eco = false; + + if (in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) + in_out->programming->z8_stutter.supported_in_blank = true; + else + in_out->programming->z8_stutter.supported_in_blank = false; + + return result; +} + +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + const struct dml2_display_cfg *display_cfg = &in_out->display_cfg->display_config; + const struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->core->clean_me_up.mode_lib; + struct dml2_dchub_global_register_set *dchubbub_regs = &in_out->programming->global_regs; + + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + /* set A */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + /* set B */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + dchubbub_regs->num_watermark_sets = 2; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h new file mode 100644 index 000000000000..e7b58f2efda4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DPMM_DCN4_H__ +#define __DML2_DPMM_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c new file mode 100644 index 000000000000..dfd01440737d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_dpmm_factory.h" +#include "dml2_dpmm_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + return true; +} + +static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + return true; +} + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_dpmm_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->map_mode_to_soc_dpm = &dummy_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->map_mode_to_soc_dpm = &dpmm_dcn3_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->map_mode_to_soc_dpm = &dpmm_dcn4_map_mode_to_soc_dpm; + out->map_watermarks = &dpmm_dcn4_map_watermarks; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h new file mode 100644 index 000000000000..20ba2e446f1d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DPMM_FACTORY_H__ +#define __DML2_DPMM_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c new file mode 100644 index 000000000000..a265f254152c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_mcg_dcn4.h" +#include "dml_top_soc_parameter_types.h" + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table); + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return build_min_clock_table(in_out->soc_bb, in_out->min_clk_table); +} + +static unsigned long long uclk_to_dram_bw_kbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + unsigned long long bw_kbps = 0; + + bw_kbps = (unsigned long long) uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + return bw_kbps; +} + +static unsigned long round_up_to_quantized_values(unsigned long value, const unsigned long *quantized_values, int num_quantized_values) +{ + int i; + + if (!quantized_values) + return 0; + + for (i = 0; i < num_quantized_values; i++) { + if (quantized_values[i] > value) + return quantized_values[i]; + } + + return 0; +} + +static bool build_min_clk_table_fine_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + bool dcfclk_fine_grained = false, fclk_fine_grained = false; + + int i; + unsigned int j; + + unsigned long min_dcfclk_khz = 0; + unsigned long min_fclk_khz = 0; + unsigned long prev_100, cur_50; + + if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[0]; + min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[0]; + + // First calculate the table for "balanced" bandwidths across UCLK/FCLK + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); + + min_table->dram_bw_table.entries[i].min_fclk_khz = (unsigned long)((((double)min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps * soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100) / ((double)soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100)) / soc_bb->fabric_datapath_to_dcn_data_return_bytes); + } + min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; + + // To create the minium table, effectively shift "up" all the dcfclk/fclk entries by 1, and then replace the lowest entry with min fclk/dcfclk + for (i = min_table->dram_bw_table.num_entries - 1; i > 0; i--) { + prev_100 = min_table->dram_bw_table.entries[i - 1].min_fclk_khz; + cur_50 = min_table->dram_bw_table.entries[i].min_fclk_khz / 2; + min_table->dram_bw_table.entries[i].min_fclk_khz = prev_100 > cur_50 ? prev_100 : cur_50; + + if (!fclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_fclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_fclk_khz, soc_bb->clk_table.fclk.clk_values_khz, soc_bb->clk_table.fclk.num_clk_values); + } + } + min_table->dram_bw_table.entries[0].min_fclk_khz /= 2; + + // Clamp to minimums and maximums + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz < min_dcfclk_khz) + min_table->dram_bw_table.entries[i].min_dcfclk_khz = min_dcfclk_khz; + + if (min_table->dram_bw_table.entries[i].min_fclk_khz < min_fclk_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = min_fclk_khz; + + if (soc_bb->max_fclk_for_uclk_dpm_khz > 0 && + min_table->dram_bw_table.entries[i].min_fclk_khz > soc_bb->max_fclk_for_uclk_dpm_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->max_fclk_for_uclk_dpm_khz; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_fclk_khz * + soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_dcfclk_khz * soc_bb->fabric_datapath_to_dcn_data_return_bytes / soc_bb->return_bus_width_bytes; + + if (!dcfclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_dcfclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_dcfclk_khz, soc_bb->clk_table.dcfclk.clk_values_khz, soc_bb->clk_table.dcfclk.num_clk_values); + } + } + + // Prune states which are invalid (some clocks exceed maximum) + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz > min_table->max_clocks_khz.dcfclk || + min_table->dram_bw_table.entries[i].min_fclk_khz > min_table->max_clocks_khz.fclk) { + min_table->dram_bw_table.num_entries = i; + break; + } + } + + // Prune duplicate states + for (i = 0; i < (int)min_table->dram_bw_table.num_entries - 1; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz == min_table->dram_bw_table.entries[i + 1].min_dcfclk_khz && + min_table->dram_bw_table.entries[i].min_fclk_khz == min_table->dram_bw_table.entries[i + 1].min_fclk_khz && + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps == min_table->dram_bw_table.entries[i + 1].pre_derate_dram_bw_kbps) { + + // i + 1 is the same state as i, so shift everything + for (j = i + 1; j < min_table->dram_bw_table.num_entries; j++) { + min_table->dram_bw_table.entries[j].min_dcfclk_khz = min_table->dram_bw_table.entries[j + 1].min_dcfclk_khz; + min_table->dram_bw_table.entries[j].min_fclk_khz = min_table->dram_bw_table.entries[j + 1].min_fclk_khz; + min_table->dram_bw_table.entries[j].pre_derate_dram_bw_kbps = min_table->dram_bw_table.entries[j + 1].pre_derate_dram_bw_kbps; + } + min_table->dram_bw_table.num_entries--; + } + } + + return true; +} + +static bool build_min_clk_table_coarse_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + int i; + + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); + min_table->dram_bw_table.entries[i].min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[i]; + min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[i]; + } + min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; + + return true; +} + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + bool result; + bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_equal = false; + + if (!soc_bb || !min_table) + return false; + + if (soc_bb->clk_table.dcfclk.num_clk_values < 2 || soc_bb->clk_table.fclk.num_clk_values < 2) + return false; + + if (soc_bb->clk_table.uclk.num_clk_values > DML_MCG_MAX_CLK_TABLE_SIZE) + return false; + + if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + + if (soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.dcfclk.num_clk_values && + soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.uclk.num_clk_values) + clock_state_count_equal = true; + + min_table->fixed_clocks_khz.amclk = 0; + min_table->fixed_clocks_khz.dprefclk = soc_bb->dprefclk_mhz * 1000; + min_table->fixed_clocks_khz.pcierefclk = soc_bb->pcie_refclk_mhz * 1000; + min_table->fixed_clocks_khz.dchubrefclk = soc_bb->dchub_refclk_mhz * 1000; + min_table->fixed_clocks_khz.xtalclk = soc_bb->xtalclk_mhz * 1000; + + min_table->max_clocks_khz.dispclk = soc_bb->clk_table.dispclk.clk_values_khz[soc_bb->clk_table.dispclk.num_clk_values - 1]; + min_table->max_clocks_khz.dppclk = soc_bb->clk_table.dppclk.clk_values_khz[soc_bb->clk_table.dppclk.num_clk_values - 1]; + min_table->max_clocks_khz.dscclk = soc_bb->clk_table.dscclk.clk_values_khz[soc_bb->clk_table.dscclk.num_clk_values - 1]; + min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; + min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; + + min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + + min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; + min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; + + if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_equal) + result = build_min_clk_table_fine_grained(soc_bb, min_table); + else + result = build_min_clk_table_coarse_grained(soc_bb, min_table); + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h new file mode 100644 index 000000000000..f54fde8fba90 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_MCG_DCN4_H__ +#define __DML2_MCG_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); +bool mcg_dcn4_unit_test(void); + +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c new file mode 100644 index 000000000000..c60b8fe90819 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_mcg_factory.h" +#include "dml2_mcg_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return true; +} + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_mcg_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->build_min_clock_table = &dummy_build_min_clock_table; + result = true; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->build_min_clock_table = &mcg_dcn4_build_min_clock_table; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h new file mode 100644 index 000000000000..ad307deca3b0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_MCG_FACTORY_H__ +#define __DML2_MCG_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c new file mode 100644 index 000000000000..1b9579a32ff2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn3.h" + +static void sort(double *list_a, int list_a_size) +{ + // For all elements b[i] in list_b[] + for (int i = 0; i < list_a_size - 1; i++) { + // Find the first element of list_a that's larger than b[i] + for (int j = i; j < list_a_size - 1; j++) { + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); + } + } +} + +static double get_max_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index) +{ + struct dml2_plane_parameters *plane_descriptor; + long max_reserved_time_ns = 0; + + for (unsigned int i = 0; i < config->display_config.num_planes; i++) { + plane_descriptor = &config->display_config.plane_descriptors[i]; + + if (plane_descriptor->stream_index == stream_index) + if (plane_descriptor->overrides.reserved_vblank_time_ns > max_reserved_time_ns) + max_reserved_time_ns = plane_descriptor->overrides.reserved_vblank_time_ns; + } + + return (max_reserved_time_ns / 1000.0); +} + + +static void set_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index, double reserved_time_us) +{ + struct dml2_plane_parameters *plane_descriptor; + + for (unsigned int i = 0; i < config->display_config.num_planes; i++) { + plane_descriptor = &config->display_config.plane_descriptors[i]; + + if (plane_descriptor->stream_index == stream_index) + plane_descriptor->overrides.reserved_vblank_time_ns = (long int)(reserved_time_us * 1000); + } +} + +static void remove_duplicates(double *list_a, int *list_a_size) +{ + int j = 0; + + if (*list_a_size == 0) + return; + + for (int i = 1; i < *list_a_size; i++) { + if (list_a[j] != list_a[i]) { + j++; + list_a[j] = list_a[i]; + } + } + + *list_a_size = j + 1; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +static bool iterate_to_next_candidiate(struct dml2_pmo_instance *pmo, int size) +{ + int borrow_from, i; + bool success = false; + + if (pmo->scratch.pmo_dcn3.current_candidate[0] > 0) { + pmo->scratch.pmo_dcn3.current_candidate[0]--; + success = true; + } else { + for (borrow_from = 1; borrow_from < size && pmo->scratch.pmo_dcn3.current_candidate[borrow_from] == 0; borrow_from++) + ; + + if (borrow_from < size) { + pmo->scratch.pmo_dcn3.current_candidate[borrow_from]--; + for (i = 0; i < borrow_from; i++) { + pmo->scratch.pmo_dcn3.current_candidate[i] = pmo->scratch.pmo_dcn3.reserved_time_candidates_count[i] - 1; + } + + success = true; + } + } + + return success; +} + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, + &display_config->display_config.stream_descriptors[remap_array[i]].timing, + sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + for (i = 0; i < remap_array_size; i++) { + if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + return !contains_drr && identical; +} + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; + + pmo->options = in_out->options; + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + } + + return true; +} + +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int stream_index, plane_index, candidate_count; + double min_reserved_vblank_time = 0; + int fclk_twait_needed_mask = 0x0; + int uclk_twait_needed_mask = 0x0; + + state->performed = true; + state->min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.max_latency_index = pmo->mcg_clock_table_size - 1; + pmo->scratch.pmo_dcn3.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + pmo->scratch.pmo_dcn3.stream_mask = 0xF; + + for (plane_index = 0; plane_index < in_out->base_display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &in_out->base_display_config->display_config.plane_descriptors[plane_index]; + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[plane_descriptor->stream_index]; + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_if_needed) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_try) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_if_needed) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_try) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (plane_descriptor->overrides.legacy_svp_config != dml2_svp_mode_override_auto) { + pmo->scratch.pmo_dcn3.stream_mask &= ~(0x1 << plane_descriptor->stream_index); + } + } + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[stream_index]; + + // The absolute minimum required time is the minimum of all the required budgets + /* + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate + == dml2_twait_budgeting_setting_require) + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us); + } + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate + == dml2_twait_budgeting_setting_require) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us); + } + } + + if (stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit + == dml2_twait_budgeting_setting_require) + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us); + */ + + min_reserved_vblank_time = get_max_reserved_time_on_all_planes_with_stream_index(in_out->base_display_config, stream_index); + + // Insert the absolute minimum into the array + candidate_count = 1; + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][0] = min_reserved_vblank_time; + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + if (!(pmo->scratch.pmo_dcn3.stream_mask & (0x1 << stream_index))) + continue; + + // For every optional feature, we create a candidate for it only if it's larger minimum. + if ((fclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us; + } + } + + if ((uclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + } + } + + if ((stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_try || + stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_if_needed) && + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > min_reserved_vblank_time) { + + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + } + + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + // Finally sort the array of candidates + sort(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + remove_duplicates(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + &pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + pmo->scratch.pmo_dcn3.current_candidate[stream_index] = + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] - 1; + } + + return true; +} + +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, stream_index; + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + stream_index = in_out->base_display_config->display_config.plane_descriptors[i].stream_index; + + if (in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]] * 1000) { + return false; + } + } + + return true; +} + +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int stream_index; + bool success = false; + bool reached_end; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn3.cur_latency_index < pmo->scratch.pmo_dcn3.max_latency_index) { + // If we haven't tried all the clock bounds to support this state, try a higher one + pmo->scratch.pmo_dcn3.cur_latency_index++; + + success = true; + } else { + // If there's nothing higher to try, then we have to have a smaller canadidate + reached_end = !iterate_to_next_candidiate(pmo, in_out->optimized_display_config->display_config.num_streams); + + if (!reached_end) { + pmo->scratch.pmo_dcn3.cur_latency_index = pmo->scratch.pmo_dcn3.min_latency_index; + success = true; + } + } + } else { + success = true; + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = pmo->scratch.pmo_dcn3.cur_latency_index; + + for (stream_index = 0; stream_index < in_out->optimized_display_config->display_config.num_streams; stream_index++) { + set_reserved_time_on_all_planes_with_stream_index(in_out->optimized_display_config, stream_index, + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]]); + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h new file mode 100644 index 000000000000..f00bd9e72a86 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_DCN3_H__ +#define __DML2_PMO_DCN3_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c new file mode 100644 index 000000000000..5769c2638f9a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -0,0 +1,2394 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml2_pmo_dcn4_fams2.h" + +static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding +static const double MIN_BLANK_STUTTER_FACTOR = 3.0; + +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { + // VActive Preferred + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then SVP + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { + // VActive only is preferred + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then VActive + VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then VBlank only + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then SVP + VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // Then SVP + DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then SVP + SVP + { + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then DRR + VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // All VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank }, + .allow_state_increase = false, + }, + + // All Vblank + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, + .allow_state_increase = true, + }, + */ +}; + +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); + + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1 || in_out->instance->options->disable_dyn_odm) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy) +{ + enum dml2_pstate_method variant_strategy = 0; + + switch (base_strategy) { + case dml2_pstate_method_vactive: + variant_strategy = dml2_pstate_method_fw_vactive_drr; + break; + case dml2_pstate_method_vblank: + variant_strategy = dml2_pstate_method_fw_vblank_drr; + break; + case dml2_pstate_method_fw_svp: + variant_strategy = dml2_pstate_method_fw_svp_drr; + break; + case dml2_pstate_method_fw_vactive_drr: + case dml2_pstate_method_fw_vblank_drr: + case dml2_pstate_method_fw_svp_drr: + case dml2_pstate_method_fw_drr: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + /* no variant for this mode */ + variant_strategy = base_strategy; + } + + return variant_strategy; +} + +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) +{ + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; + + switch (stream_count) { + case 1: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_1_display; + break; + case 2: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_2_display; + break; + case 3: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_3_display; + break; + case 4: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_4_display; + break; + default: + break; + } + + return expanded_strategy_list; +} + +static unsigned int get_num_expanded_strategies( + struct dml2_pmo_init_data *init_data, + int stream_count) +{ + return init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]; +} + +static void insert_strategy_into_expanded_list( + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, + const int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + if (expanded_strategy_list && num_expanded_strategies) { + memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + (*num_expanded_strategies)++; + } +} + +static void expand_base_strategy( + const struct dml2_pmo_pstate_strategy *base_strategy, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + bool skip_to_next_stream; + bool expanded_strategy_added; + bool skip_iteration; + unsigned int i, j; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + } + + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + + i = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (stream_iteration_indices[0] < stream_count) { + skip_to_next_stream = false; + expanded_strategy_added = false; + skip_iteration = false; + + /* determine what to do for this iteration */ + if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { + /* decrement count and assign method */ + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; + num_streams_per_method[stream_iteration_indices[i]] -= 1; + + if (i >= stream_count - 1) { + /* insert into strategy list */ + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies); + expanded_strategy_added = true; + } else { + /* skip to next stream */ + skip_to_next_stream = true; + } + } else { + skip_iteration = true; + } + + /* prepare for next iteration */ + if (skip_to_next_stream) { + i++; + } else { + /* restore count */ + if (!skip_iteration) { + num_streams_per_method[stream_iteration_indices[i]] += 1; + } + + /* increment iteration count */ + stream_iteration_indices[i]++; + + /* if iterations are complete, or last stream was reached */ + if ((stream_iteration_indices[i] >= stream_count || expanded_strategy_added) && i > 0) { + /* reset per stream index, decrement i */ + stream_iteration_indices[i] = 0; + i--; + + /* restore previous stream's count and increment index */ + num_streams_per_method[stream_iteration_indices[i]] += 1; + stream_iteration_indices[i]++; + } + } + } +} + + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int stream_count) +{ + bool valid = true; + unsigned int i; + + /* check all restrictions are met */ + for (i = 0; i < stream_count; i++) { + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } + + return valid; +} + +static void expand_variant_strategy( + const struct dml2_pmo_pstate_strategy *base_strategy, + const unsigned int stream_count, + const bool should_permute, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + if (should_permute) { + /* permutations are permitted, proceed to expand */ + expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } else { + /* no permutations allowed, so add to list now */ + insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } + } + + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; + } + } +} + +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) +{ + unsigned int i; + + /* expand every explicit base strategy (except all DRR) */ + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies); + expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies); + } +} + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + int i = 0; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int base_list_size = 0; + const struct dml2_pmo_pstate_strategy *base_list = NULL; + unsigned int *expanded_list_size = NULL; + struct dml2_pmo_pstate_strategy *expanded_list = NULL; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; + + pmo->fams_params.v2.subvp.refresh_rate_limit_max = 175; + pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0; + pmo->fams_params.v2.drr.refresh_rate_limit_max = 1000; + pmo->fams_params.v2.drr.refresh_rate_limit_min = 119; + + pmo->options = in_out->options; + + /* generate permutations of p-state configs from base strategy list */ + for (i = 0; i < PMO_DCN4_MAX_DISPLAYS; i++) { + switch (i+1) { + case 1: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_1_display; + base_list_size = base_strategy_list_1_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display; + + break; + case 2: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_2_display; + base_list_size = base_strategy_list_2_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display; + + break; + case 3: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_3_display; + base_list_size = base_strategy_list_3_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display; + + break; + case 4: + if (pmo->options->override_strategy_lists[i] && pmo->options->num_override_strategies_per_list[i]) { + base_list = pmo->options->override_strategy_lists[i]; + base_list_size = pmo->options->num_override_strategies_per_list[i]; + } else { + base_list = base_strategy_list_4_display; + base_list_size = base_strategy_list_4_display_size; + } + + expanded_list_size = &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i]; + expanded_list = pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display; + + break; + } + + DML_ASSERT(base_list_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_list, + base_list_size, + i + 1, + expanded_list, + expanded_list_size); + } + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + state->unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + state->unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + state->unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + state->unoptimizable_streams[i] = true; + } + + state->performed = true; + + return true; +} + +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0) + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + else + odm_load = 0; + + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) +{ + *bit_field = *bit_field | (0x1 << bit_offset); +} + +static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) +{ + if (bit_field & (0x1 << bit_offset)) + return true; + + return false; +} + +static void build_synchronized_timing_groups( + struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config) +{ + unsigned int i, j; + struct dml2_timing_cfg *master_timing; + + unsigned int stream_mapped_mask = 0; + unsigned int num_timing_groups = 0; + unsigned int timing_group_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* clear all group masks */ + memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); + memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); + memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); + s->pmo_dcn4.num_timing_groups = 0; + + for (i = 0; i < display_config->display_config.num_streams; i++) { + master_timing = &display_config->display_config.stream_descriptors[i].timing; + + /* only need to build group of this stream is not in a group already */ + if (is_bit_set_in_bitfield(stream_mapped_mask, i)) { + continue; + } + set_bit_in_bitfield(&stream_mapped_mask, i); + timing_group_idx = num_timing_groups; + num_timing_groups++; + + /* trivially set default timing group to itself */ + set_bit_in_bitfield(&s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i); + s->pmo_dcn4.group_line_time_us[timing_group_idx] = (double)master_timing->h_total / master_timing->pixel_clock_khz * 1000.0; + + /* if drr is in use, timing is not sychnronizable */ + if (master_timing->drr_config.enabled) { + s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); + continue; + } + + /* find synchronizable timing groups */ + for (j = i + 1; j < display_config->display_config.num_streams; j++) { + if (memcmp(master_timing, + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0) { + set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); + set_bit_in_bitfield(&stream_mapped_mask, j); + } + } + } + + s->pmo_dcn4.num_timing_groups = num_timing_groups; +} + +static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + bool valid = true; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + /* check if stream has enough vactive margin */ + valid &= is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.stream_vactive_capability_mask, i); + } + } + + return valid; +} + +static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + + bool synchronizable = true; + + /* find first vblank stream index and compare the timing group mask */ + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + if (mask != pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[i]) { + /* vblank streams are not synchronizable */ + synchronizable = false; + } + break; + } + } + + return synchronizable; +} + +static unsigned int calc_svp_microschedule(const struct dml2_pstate_meta *pstate_meta) +{ + return pstate_meta->contention_delay_otg_vlines + + pstate_meta->method_subvp.programming_delay_otg_vlines + + pstate_meta->method_subvp.phantom_vtotal + + pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + pstate_meta->blackout_otg_vlines; +} + +static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + for (i = 0; i < DML2_MAX_PLANES; i++) { + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_pstate_meta *stream_pstate_meta; + + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; + + if (!stream_descriptor->timing.drr_config.enabled) + return false; + + /* cannot support required vtotal */ + if (stream_pstate_meta->method_drr.stretched_vtotal > stream_pstate_meta->max_vtotal) { + return false; + } + + /* check rr is within bounds */ + if (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { + return false; + } + + /* check required stretch is allowed */ + if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && + stream_pstate_meta->method_drr.stretched_vtotal - stream_pstate_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { + return false; + } + } + } + + return true; +} + +static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_pstate_meta *stream_pstate_meta; + unsigned int microschedule_vlines; + unsigned int i; + unsigned int mcaches_per_plane; + unsigned int total_mcaches_required = 0; + + unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; + + /* confirm timing it is not a centered timing */ + for (i = 0; i < display_config->display_config.num_planes; i++) { + plane_descriptor = &display_config->display_config.plane_descriptors[i]; + mcaches_per_plane = 0; + + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 + + display_config->stage2.mcache_allocations[i].num_mcaches_plane1 - + (display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0); + } + + if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) { + num_planes_per_stream[plane_descriptor->stream_index]++; + + /* check recout height covers entire otg vactive, and single plane */ + if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || + !plane_descriptor->composition.rect_out_height_spans_vactive || + plane_descriptor->composition.rotation_angle != dml2_rotation_0) { + return false; + } + + /* phantom requires same number of mcaches as main */ + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane *= 2; + } + } + total_mcaches_required += mcaches_per_plane; + } + + if (total_mcaches_required > pmo->soc_bb->num_dcc_mcaches) { + /* too many mcaches required */ + return false; + } + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[i]; + + if (stream_descriptor->overrides.disable_subvp) { + return false; + } + + microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_pstate_meta[i]); + + /* block if using an interlaced timing */ + if (stream_descriptor->timing.interlaced) { + return false; + } + + /* 1) svp main stream's vactive must be able to fit the microschedule + * 2) refresh rate must be within the allowed bounds + */ + if (microschedule_vlines >= stream_descriptor->timing.v_active || + (stream_pstate_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || + stream_pstate_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { + return false; + } + } + } + + return true; +} + +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +{ + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; + scratch->pmo_dcn4.num_pstate_candidates++; +} + +static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy) +{ + enum dml2_pstate_method method = dml2_pstate_method_na; + + switch (override_strategy) { + case dml2_uclk_pstate_change_strategy_force_vactive: + method = dml2_pstate_method_vactive; + break; + case dml2_uclk_pstate_change_strategy_force_vblank: + method = dml2_pstate_method_vblank; + break; + case dml2_uclk_pstate_change_strategy_force_drr: + method = dml2_pstate_method_fw_drr; + break; + case dml2_uclk_pstate_change_strategy_force_mall_svp: + method = dml2_pstate_method_fw_svp; + break; + case dml2_uclk_pstate_change_strategy_force_mall_full_frame: + case dml2_uclk_pstate_change_strategy_auto: + default: + method = dml2_pstate_method_na; + } + + return method; +} + +static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method) +{ + enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto; + + switch (method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_pstate_method_fw_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + override_strategy = dml2_uclk_pstate_change_strategy_auto; + } + + return override_strategy; +} + +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) +{ + unsigned int i; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method)) + return false; + } + } + + return true; +} + +static void build_method_scheduling_params( + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta, + struct dml2_pstate_meta *stream_pstate_meta) +{ + stream_method_pstate_meta->allow_time_us = + (double)((int)stream_method_pstate_meta->allow_end_otg_vline - (int)stream_method_pstate_meta->allow_start_otg_vline) * + stream_pstate_meta->otg_vline_time_us; + if (stream_method_pstate_meta->allow_time_us >= stream_method_pstate_meta->period_us) { + /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/ + stream_method_pstate_meta->disallow_time_us = 0.0; + } else { + stream_method_pstate_meta->disallow_time_us = + stream_method_pstate_meta->period_us - stream_method_pstate_meta->allow_time_us; + } +} + +static struct dml2_pstate_per_method_common_meta *get_per_method_common_meta( + struct dml2_pmo_instance *pmo, + enum dml2_pstate_method stream_pstate_method, + int stream_idx) +{ + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta = NULL; + + switch (stream_pstate_method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vactive.common; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_vblank.common; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_subvp.common; + break; + case dml2_pstate_method_fw_drr: + stream_method_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_idx].method_drr.common; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + stream_method_pstate_meta = NULL; + } + + return stream_method_pstate_meta; +} + +static bool is_timing_group_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy, + const unsigned int timing_group_idx, + struct dml2_pstate_per_method_common_meta *group_pstate_meta) +{ + unsigned int i; + struct dml2_pstate_per_method_common_meta *stream_method_pstate_meta; + + unsigned int base_stream_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* find base stream idx */ + for (base_stream_idx = 0; base_stream_idx < display_cfg->display_config.num_streams; base_stream_idx++) { + if (is_bit_set_in_bitfield(s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], base_stream_idx)) { + /* master stream found */ + break; + } + } + + /* init allow start and end lines for timing group */ + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); + if (!stream_method_pstate_meta) + return false; + + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; + group_pstate_meta->period_us = stream_method_pstate_meta->period_us; + for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { + stream_method_pstate_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); + if (!stream_method_pstate_meta) + continue; + + if (group_pstate_meta->allow_start_otg_vline < stream_method_pstate_meta->allow_start_otg_vline) { + /* set group allow start to larger otg vline */ + group_pstate_meta->allow_start_otg_vline = stream_method_pstate_meta->allow_start_otg_vline; + } + + if (group_pstate_meta->allow_end_otg_vline > stream_method_pstate_meta->allow_end_otg_vline) { + /* set group allow end to smaller otg vline */ + group_pstate_meta->allow_end_otg_vline = stream_method_pstate_meta->allow_end_otg_vline; + } + + /* check waveform still has positive width */ + if (group_pstate_meta->allow_start_otg_vline >= group_pstate_meta->allow_end_otg_vline) { + /* timing group is not schedulable */ + return false; + } + } + } + + /* calculate the rest of the meta */ + build_method_scheduling_params(group_pstate_meta, &pmo->scratch.pmo_dcn4.stream_pstate_meta[base_stream_idx]); + + return group_pstate_meta->allow_time_us > 0.0 && + group_pstate_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us; +} + +static bool is_config_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy) +{ + unsigned int i, j; + bool schedulable; + struct dml2_pmo_scratch *s = &pmo->scratch; + + double max_allow_delay_us = 0.0; + + memset(s->pmo_dcn4.group_common_pstate_meta, 0, sizeof(s->pmo_dcn4.group_common_pstate_meta)); + memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES); + + /* search for a general solution to the schedule */ + + /* STAGE 0: Early return for special cases */ + if (display_cfg->display_config.num_streams == 0) { + return true; + } + + /* STAGE 1: confirm allow waves overlap for synchronizable streams */ + schedulable = true; + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; + s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_pstate_meta[i])) { + /* synchronized timing group was not schedulable */ + schedulable = false; + break; + } + max_allow_delay_us += s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us; + } + + if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) { + /* 1. the only timing group was schedulable, so early pass + * 2. one of the timing groups was not schedulable, so early fail */ + return schedulable; + } + + /* STAGE 2: Check allow can't be masked entirely by other disallows */ + schedulable = true; + + /* sort disallow times from greatest to least */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; + double jp1_disallow_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; + if (j_disallow_us < jp1_disallow_us) { + /* swap as A < B */ + swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]); + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* Check worst case disallow region occurs in the middle of allow for the + * other display, or when >2 streams continue to halve the remaining allow time. + */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + if (s->pmo_dcn4.group_common_pstate_meta[i].disallow_time_us <= 0.0) { + /* this timing group always allows */ + continue; + } + + double max_allow_time_us = s->pmo_dcn4.group_common_pstate_meta[i].allow_time_us; + for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) { + unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; + /* stream can't overlap itself */ + if (i != sorted_j && s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us > 0.0) { + max_allow_time_us = math_min2( + s->pmo_dcn4.group_common_pstate_meta[sorted_j].allow_time_us, + (max_allow_time_us - s->pmo_dcn4.group_common_pstate_meta[sorted_j].disallow_time_us) / 2); + + if (max_allow_time_us < 0.0) { + /* failed exit early */ + break; + } + } + } + + if (max_allow_time_us <= 0.0) { + /* not enough time for microschedule in the worst case */ + schedulable = false; + break; + } + } + + if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + return true; + } + + /* STAGE 3: check larger allow can fit period of all other streams */ + schedulable = true; + + /* sort periods from greatest to least */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; + double jp1_period_us = s->pmo_dcn4.group_common_pstate_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; + if (j_period_us < jp1_period_us) { + /* swap as A < B */ + swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]); + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* check larger allow can fit period of all other streams */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups - 1; i++) { + unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i]; + unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; + + if (s->pmo_dcn4.group_common_pstate_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_pstate_meta[sorted_ip1].period_us || + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { + schedulable = false; + break; + } + } + + if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + return true; + } + + /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ + if (s->pmo_dcn4.num_timing_groups == 2 && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { + double period_ratio; + double max_shift_us; + double shift_per_period; + + /* default period_0 > period_1 */ + unsigned int lrg_idx = 0; + unsigned int sml_idx = 1; + if (s->pmo_dcn4.group_common_pstate_meta[0].period_us < s->pmo_dcn4.group_common_pstate_meta[1].period_us) { + /* period_0 < period_1 */ + lrg_idx = 1; + sml_idx = 0; + } + period_ratio = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us; + shift_per_period = s->pmo_dcn4.group_common_pstate_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio)); + max_shift_us = s->pmo_dcn4.group_common_pstate_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us; + max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_pstate_meta[lrg_idx].period_us; + + if (shift_per_period > 0.0 && + shift_per_period < s->pmo_dcn4.group_common_pstate_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_pstate_meta[sml_idx].allow_time_us && + max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) { + schedulable = true; + } + } + + return schedulable; +} + +static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pstate_method stream_pstate_method, + unsigned int stream_index) +{ + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; + bool strategy_matches_drr_requirements = true; + + /* check if strategy is compatible with stream drr capability and strategy */ + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + display_cfg->display_config.num_streams > 1 && + stream_descriptor->timing.drr_config.enabled && + (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { + /* DRR is active, so config may become unschedulable */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR is variable, fw exclusive methods require DRR to be clamped */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && + pmo->options->disable_drr_var_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR variable is active, but policy blocks DRR for p-state when this happens */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_var || + !stream_descriptor->timing.drr_config.enabled || + stream_descriptor->timing.drr_config.disallowed)) { + /* DRR variable strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_clamped || + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { + /* DRR fixed strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && + pmo->options->disable_fams2) { + /* FW modes require FAMS2 */ + strategy_matches_drr_requirements = false; + } + + return strategy_matches_drr_requirements; +} + +static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const struct dml2_pmo_pstate_strategy *pstate_strategy) +{ + struct dml2_pmo_scratch *s = &pmo->scratch; + + unsigned int stream_index = 0; + + unsigned int svp_count = 0; + unsigned int svp_stream_mask = 0; + unsigned int drr_count = 0; + unsigned int drr_stream_mask = 0; + unsigned int vactive_count = 0; + unsigned int vactive_stream_mask = 0; + unsigned int vblank_count = 0; + unsigned int vblank_stream_mask = 0; + + bool strategy_matches_forced_requirements = true; + bool strategy_matches_drr_requirements = true; + + // Tabulate everything + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { + strategy_matches_forced_requirements = false; + break; + } + + strategy_matches_drr_requirements &= + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); + + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + svp_count++; + set_bit_in_bitfield(&svp_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + drr_count++; + set_bit_in_bitfield(&drr_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + vactive_count++; + set_bit_in_bitfield(&vactive_stream_mask, stream_index); + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + vblank_count++; + set_bit_in_bitfield(&vblank_stream_mask, stream_index); + } + } + + if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) + return false; + + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) + return false; + + if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) + return false; + + if (drr_count > 0 && (pmo->options->disable_drr_var || !all_timings_support_drr(pmo, display_cfg, drr_stream_mask))) + return false; + + if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) + return false; + + return is_config_schedulable(pmo, display_cfg, pstate_strategy); +} + +static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned int i; + int min_vactive_margin_us = 0xFFFFFFF; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) + min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; + } + } + + return min_vactive_margin_us; +} + +static unsigned int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned char i; + unsigned int max_vactive_fill_us = 0; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us > max_vactive_fill_us) + max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us; + } + } + + return max_vactive_fill_us; +} + +static void build_pstate_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + const struct dml2_ip_capabilities *ip_caps = pmo->ip_caps; + const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index]; + const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index]; + const struct dml2_timing_cfg *timing = &stream_descriptor->timing; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; + + /* worst case all other streams require some programming at the same time, 0 if only 1 stream */ + unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us + + (unsigned int)math_max3(ip_caps->fams2.subvp_programming_delay_us, ip_caps->fams2.drr_programming_delay_us, ip_caps->fams2.allow_programming_delay_us)) * + (display_config->display_config.num_streams - 1); + + /* common */ + stream_pstate_meta->valid = true; + stream_pstate_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; + stream_pstate_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; + stream_pstate_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->nom_vtotal * timing->h_total); + stream_pstate_meta->nom_frame_time_us = + (double)stream_pstate_meta->nom_vtotal * stream_pstate_meta->otg_vline_time_us; + stream_pstate_meta->vblank_start = timing->v_blank_end + timing->v_active; + + if (stream_descriptor->timing.drr_config.enabled == true) { + if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9); + } else { + /* assume min of 48Hz */ + stream_pstate_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + (48000000.0 * stream_descriptor->timing.h_total) * 1e9); + } + } else { + stream_pstate_meta->max_vtotal = stream_pstate_meta->nom_vtotal; + } + stream_pstate_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_pstate_meta->max_vtotal * timing->h_total); + stream_pstate_meta->max_frame_time_us = + (double)stream_pstate_meta->max_vtotal * stream_pstate_meta->otg_vline_time_us; + + stream_pstate_meta->scheduling_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->contention_delay_otg_vlines = + (unsigned int)math_ceil(contention_delay_us / stream_pstate_meta->otg_vline_time_us); + /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */ + stream_pstate_meta->allow_to_target_delay_otg_vlines = + (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_pstate_meta->otg_vline_time_us)) + 1; + stream_pstate_meta->min_allow_width_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_pstate_meta->otg_vline_time_us); + /* this value should account for urgent latency */ + stream_pstate_meta->blackout_otg_vlines = + (unsigned int)math_ceil(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us / + stream_pstate_meta->otg_vline_time_us); + + /* scheduling params should be built based on the worst case for allow_time:disallow_time */ + + /* vactive */ + if (display_config->display_config.num_streams == 1) { + /* for single stream, guarantee at least an instant of allow */ + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( + math_max2(0.0, + timing->v_active - math_max2(1.0, stream_pstate_meta->min_allow_width_otg_vlines) - stream_pstate_meta->blackout_otg_vlines)); + } else { + /* for multi stream, bound to a max fill time defined by IP caps */ + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = + (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_pstate_meta->otg_vline_time_us); + } + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us = stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_pstate_meta->otg_vline_time_us; + + if (stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) { + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = + timing->v_blank_end + stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; + } else { + stream_pstate_meta->method_vactive.common.allow_start_otg_vline = 0; + stream_pstate_meta->method_vactive.common.allow_end_otg_vline = 0; + } + stream_pstate_meta->method_vactive.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vactive.common, stream_pstate_meta); + + /* vblank */ + stream_pstate_meta->method_vblank.common.allow_start_otg_vline = stream_pstate_meta->vblank_start; + stream_pstate_meta->method_vblank.common.allow_end_otg_vline = + stream_pstate_meta->method_vblank.common.allow_start_otg_vline + 1; + stream_pstate_meta->method_vblank.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_vblank.common, stream_pstate_meta); + + /* subvp */ + stream_pstate_meta->method_subvp.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_subvp.phantom_vactive = + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_info->phantom_min_v_active; + stream_pstate_meta->method_subvp.phantom_vfp = + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines; + /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/ + stream_pstate_meta->method_subvp.phantom_vtotal = + stream_info->phantom_v_startup + + stream_pstate_meta->method_subvp.phantom_vfp + + 1 + + stream_pstate_meta->method_subvp.df_throttle_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vactive; + stream_pstate_meta->method_subvp.common.allow_start_otg_vline = + stream_descriptor->timing.v_blank_end + + stream_pstate_meta->contention_delay_otg_vlines + + stream_pstate_meta->method_subvp.programming_delay_otg_vlines + + stream_pstate_meta->method_subvp.phantom_vtotal + + stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_subvp.common.allow_end_otg_vline = + stream_pstate_meta->vblank_start - + stream_pstate_meta->blackout_otg_vlines; + stream_pstate_meta->method_subvp.common.period_us = stream_pstate_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_pstate_meta->method_subvp.common, stream_pstate_meta); + + /* drr */ + stream_pstate_meta->method_drr.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_pstate_meta->otg_vline_time_us); + stream_pstate_meta->method_drr.common.allow_start_otg_vline = + stream_pstate_meta->vblank_start + + stream_pstate_meta->allow_to_target_delay_otg_vlines; + stream_pstate_meta->method_drr.common.period_us = stream_pstate_meta->nom_frame_time_us; + if (display_config->display_config.num_streams <= 1) { + /* only need to stretch vblank for blackout time */ + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; + } else { + /* multi display needs to always be schedulable */ + stream_pstate_meta->method_drr.stretched_vtotal = + stream_pstate_meta->nom_vtotal * 2 + + stream_pstate_meta->allow_to_target_delay_otg_vlines + + stream_pstate_meta->min_allow_width_otg_vlines + + stream_pstate_meta->blackout_otg_vlines; + } + stream_pstate_meta->method_drr.common.allow_end_otg_vline = + stream_pstate_meta->method_drr.stretched_vtotal - + stream_pstate_meta->blackout_otg_vlines; + build_method_scheduling_params(&stream_pstate_meta->method_drr.common, stream_pstate_meta); +} + +static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; + struct dml2_pstate_meta *stream_pstate_meta = &pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index]; + + stream_svp_meta->valid = true; + + /* PMO FAMS2 precaulcates these values */ + stream_svp_meta->v_active = stream_pstate_meta->method_subvp.phantom_vactive; + stream_svp_meta->v_front_porch = stream_pstate_meta->method_subvp.phantom_vfp; + stream_svp_meta->v_total = stream_pstate_meta->method_subvp.phantom_vtotal; +} + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + struct dml2_pmo_scratch *s = &pmo->scratch; + + struct display_configuation_with_meta *display_config; + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; + struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; + unsigned int strategy_list_size = 0; + unsigned int plane_index, stream_index, i; + bool build_override_strategy = true; + + state->performed = true; + in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + + display_config = in_out->base_display_config; + display_config->display_config.overrides.enable_subvp_implicit_pmo = true; + + memset(s, 0, sizeof(struct dml2_pmo_scratch)); + + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; + pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; + + set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); + + state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto; + override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] = + uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy); + } + + // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) + set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); + + /* FAMS2 meta */ + build_pstate_meta_per_stream(pmo, display_config, stream_index); + + /* SVP meta */ + build_subvp_meta_per_stream(pmo, display_config, stream_index); + } + + /* get synchronized timing groups */ + build_synchronized_timing_groups(pmo, display_config); + + if (build_override_strategy) { + /* build expanded override strategy list (no permutations) */ + override_base_strategy.allow_state_increase = true; + s->pmo_dcn4.num_expanded_override_strategies = 0; + insert_strategy_into_expanded_list(&override_base_strategy, + display_config->display_config.num_streams, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + expand_variant_strategy(&override_base_strategy, + display_config->display_config.num_streams, + false, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + + /* use override strategy list */ + strategy_list = s->pmo_dcn4.expanded_override_strategy_list; + strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies; + } else { + /* use predefined strategy list */ + strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + } + + if (!strategy_list || strategy_list_size == 0) + return false; + + s->pmo_dcn4.num_pstate_candidates = 0; + + for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); + } + } + + if (s->pmo_dcn4.num_pstate_candidates > 0) { + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true; + s->pmo_dcn4.cur_pstate_candidate = -1; + return true; + } else { + return false; + } +} + +static void reset_display_configuration(struct display_configuation_with_meta *display_config) +{ + unsigned int plane_index; + unsigned int stream_index; + struct dml2_plane_parameters *plane; + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + display_config->stage3.stream_svp_meta[stream_index].valid = false; + + display_config->display_config.stream_descriptors[stream_index].overrides.minimize_active_latency_hiding = false; + display_config->display_config.overrides.best_effort_min_active_latency_hiding_us = 0; + } + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Unset SubVP + plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; + + // Remove reserve time + plane->overrides.reserved_vblank_time_ns = 0; + + // Reset strategy to auto + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na; + } +} + +static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr; + + } + } +} + +static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank; + + } + } +} + +static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr; + } + } +} + +static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + if (!pmo->options->disable_vactive_det_fill_bw_pad) { + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + } + } + } +} + +static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + unsigned int stream_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; + + if (!pmo->options->disable_vactive_det_fill_bw_pad) { + display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = + (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_pstate_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us); + } + } + } +} + +static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_instance *pmo, int strategy_index) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + bool fams2_required = false; + bool success = true; + unsigned int stream_index; + + reset_display_configuration(display_config); + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { + success = false; + break; + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) { + setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { + setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) { + fams2_required = true; + setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + fams2_required = true; + setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + fams2_required = true; + setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + fams2_required = true; + setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + fams2_required = true; + setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } + } + + /* copy FAMS2 meta */ + if (success) { + display_config->stage3.fams2_required = fams2_required; + memcpy(&display_config->stage3.stream_pstate_meta, + &scratch->pmo_dcn4.stream_pstate_meta, + sizeof(struct dml2_pstate_meta) * DML2_MAX_PLANES); + } + + return success; +} + +static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) +{ + int min_time_us = 0xFFFFFF; + unsigned int plane_index = 0; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) + min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; + } + } + return min_time_us; +} + +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + bool p_state_supported = true; + unsigned int stream_index; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + int MIN_VACTIVE_MARGIN_VBLANK = 0; + int MIN_VACTIVE_MARGIN_DRR = 0; + int REQUIRED_RESERVED_TIME = 0; + + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; + MIN_VACTIVE_MARGIN_DRR = INT_MIN; + REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + + if (s->pmo_dcn4.cur_pstate_candidate < 0) + return false; + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + struct dml2_pstate_meta *stream_pstate_meta = &s->pmo_dcn4.stream_pstate_meta[stream_index]; + + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { + if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || + get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_us) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { + if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < + REQUIRED_RESERVED_TIME || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { + if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { + p_state_supported = false; + break; + } + } + + return p_state_supported; +} + +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { + s->pmo_dcn4.cur_latency_index++; + + success = true; + } + } + + if (!success) { + s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; + s->pmo_dcn4.cur_pstate_candidate++; + + if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { + success = true; + } + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; + setup_display_config(in_out->optimized_display_config, in_out->instance, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); + } + + return success; +} + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + bool stutter_period_meets_z8_eco = true; + bool z8_stutter_optimization_too_expensive = false; + bool stutter_optimization_too_expensive = false; + double line_time_us, vblank_nom_time_us; + + unsigned int i; + + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + return false; // Unexpected SoCBB setup + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[i].active_latency_hiding_us < + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us + pmo->soc_bb->power_management_parameters.z8_min_idle_time) { + stutter_period_meets_z8_eco = false; + break; + } + } + + for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000; + vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom; + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { + z8_stutter_optimization_too_expensive = true; + break; + } + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { + stutter_optimization_too_expensive = true; + break; + } + } + + pmo->scratch.pmo_dcn4.num_stutter_candidates = 0; + pmo->scratch.pmo_dcn4.cur_stutter_candidate = 0; + + if (stutter_period_meets_z8_eco && !z8_stutter_optimization_too_expensive) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = true; + } + } else { + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false; + } + + if (!stutter_optimization_too_expensive && pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + } + + if (pmo->scratch.pmo_dcn4.num_stutter_candidates == 0) + success = false; + + return success; +} + +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->scratch.pmo_dcn4.z8_vblank_optimizable && + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { + success = false; + break; + } + if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { + success = false; + break; + } + } + + return success; +} + +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int i; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (!in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); + } + + success = true; + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h new file mode 100644 index 000000000000..6baab7ad6ecc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_FAMS2_DCN4_H__ +#define __DML2_PMO_FAMS2_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c new file mode 100644 index 000000000000..55d2464365d0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn4_fams2.h" +#include "dml2_pmo_dcn3.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + return false; +} + +static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + return true; +} + +static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + return false; +} + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out) +{ + bool result = false; + + if (!out) + return false; + + memset(out, 0, sizeof(struct dml2_pmo_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->initialize = pmo_dcn3_initialize; + + out->optimize_dcc_mcache = pmo_dcn3_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn3_init_for_vmin; + out->test_for_vmin = pmo_dcn3_test_for_vmin; + out->optimize_for_vmin = pmo_dcn3_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn3_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn3_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn3_optimize_for_pstate_support; + + out->init_for_stutter = dummy_init_for_stutter; + out->test_for_stutter = dummy_test_for_stutter; + out->optimize_for_stutter = dummy_optimize_for_stutter; + + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = pmo_dcn4_fams2_initialize; + + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn4_fams2_init_for_vmin; + out->test_for_vmin = pmo_dcn4_fams2_test_for_vmin; + out->optimize_for_vmin = pmo_dcn4_fams2_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn4_fams2_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn4_fams2_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn4_fams2_optimize_for_pstate_support; + + out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; + out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; + out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h new file mode 100644 index 000000000000..b90f6263cd85 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_PMO_FACTORY_H__ +#define __DML2_PMO_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out); + +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c new file mode 100644 index 000000000000..e17b5ceba447 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "lib_float_math.h" + +#define ASSERT(condition) + +#define isNaN(number) ((number) != (number)) + + /* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +double math_mod(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 - arg1 * ((int)(arg1 / arg2)); +} + +double math_min2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 < arg2 ? arg1 : arg2; +} + +double math_max2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 > arg2 ? arg1 : arg2; +} + +double math_floor2(const double arg, const double significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance)) * significance; +} + +double math_floor(const double arg) +{ + return ((int)(arg)); +} + +double math_ceil(const double arg) +{ + return (int)(arg + 0.99999); +} + +double math_ceil2(const double arg, const double significance) +{ + return ((int)(arg / significance + 0.99999)) * significance; +} + +double math_max3(double v1, double v2, double v3) +{ + return v3 > math_max2(v1, v2) ? v3 : math_max2(v1, v2); +} + +double math_max4(double v1, double v2, double v3, double v4) +{ + return v4 > math_max3(v1, v2, v3) ? v4 : math_max3(v1, v2, v3); +} + +double math_max5(double v1, double v2, double v3, double v4, double v5) +{ + return math_max3(v1, v2, v3) > math_max2(v4, v5) ? math_max3(v1, v2, v3) : math_max2(v4, v5); +} + +float math_pow(float a, float exp) +{ + double temp; + if ((int)exp == 0) + return 1; + temp = math_pow(a, (float)((int)(exp / 2))); + if (((int)exp % 2) == 0) { + return (float)(temp * temp); + } else { + if ((int)exp > 0) + return (float)(a * temp * temp); + else + return (float)((temp * temp) / a); + } +} + +double math_fabs(double a) +{ + if (a > 0) + return (a); + else + return (-a); +} + +float math_log(float a, float b) +{ + int *const exp_ptr = (int *)(&a); + int x = *exp_ptr; + const int log_2 = ((x >> 23) & 255) - 128; + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3; + + if (b > 2.00001 || b < 1.99999) + return (a + log_2) / math_log(b, 2); + else + return (a + log_2); +} + +float math_log2(float a) +{ + return math_log(a, 2.0); +} + +// approximate log2 value of a input +// - precise if the input pwr of 2, else the approximation will be an integer = floor(actual_log2) +unsigned int math_log2_approx(unsigned int a) +{ + unsigned int log2_val = 0; + while (a > 1) { + a = a >> 1; + log2_val++; + } + return log2_val; +} + +double math_round(double a) +{ + const double round_pt = 0.5; + + return math_floor(a + round_pt); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h new file mode 100644 index 000000000000..e13b0c5939b0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __LIB_FLOAT_MATH_H__ +#define __LIB_FLOAT_MATH_H__ + +double math_mod(const double arg1, const double arg2); +double math_min2(const double arg1, const double arg2); +double math_max2(const double arg1, const double arg2); +double math_floor2(const double arg, const double significance); +double math_floor(const double arg); +double math_ceil(const double arg); +double math_ceil2(const double arg, const double significance); +double math_max3(double v1, double v2, double v3); +double math_max4(double v1, double v2, double v3, double v4); +double math_max5(double v1, double v2, double v3, double v4, double v5); +float math_pow(float a, float exp); +double math_fabs(double a); +float math_log(float a, float b); +float math_log2(float a); +unsigned int math_log2_approx(unsigned int a); +double math_round(double a); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c new file mode 100644 index 000000000000..5a33e2f357f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_interfaces.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml_top.h" +#include "dml2_internal_shared_types.h" +#include "dml2_top_soc15.h" + +unsigned int dml2_get_instance_size_bytes(void) +{ + return sizeof(struct dml2_instance); +} + +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + switch (in_out->options.project_id) { + case dml2_project_dcn4x_stage1: + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + return dml2_top_soc15_initialize_instance(in_out); + case dml2_project_invalid: + default: + return false; + } +} + +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.check_mode_supported) + return false; + + return in_out->dml2_instance->funcs.check_mode_supported(in_out); +} + +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mode_programming) + return false; + + return in_out->dml2_instance->funcs.build_mode_programming(in_out); +} + +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mcache_programming) + return false; + + return in_out->dml2_instance->funcs.build_mcache_programming(in_out); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c new file mode 100644 index 000000000000..5e14d85821e2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_legacy.h" +#include "dml2_top_soc15.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "display_mode_core_structs.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h new file mode 100644 index 000000000000..14d0ae03dce6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_legacy.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_LEGACY_H__ +#define __DML2_TOP_LEGACY_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out); +#endif /* __DML2_TOP_LEGACY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c new file mode 100644 index 000000000000..4a7c4c62111e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.c @@ -0,0 +1,1170 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_soc15.h" +#include "dml2_mcg_factory.h" +#include "dml2_dpmm_factory.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "lib_float_math.h" +#include "dml2_debug.h" +static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; +} + +static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = 0; +} + +static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) +{ + memcpy(dst, src, sizeof(struct display_configuation_with_meta)); +} + +static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + state->performed = true; + + return true; +} + +static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + return state->min_clk_index_for_latency == 0; +} + +static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) +{ + bool result = false; + + if (params->display_config->stage1.min_clk_index_for_latency > 0) { + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + params->optimized_display_config->stage1.min_clk_index_for_latency--; + result = true; + } + + return result; +} + +static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + bool mcache_success = false; + bool result = false; + + memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); + + l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; + l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; + l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + + result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations + + if (result) { + l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; + + dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); + + l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; + l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; + l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; + + mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works + + memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); + } + + return mcache_success; +} + +static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + bool optimize_success = false; + + if (params->last_candidate_supported == false) + return false; + + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + + l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; + l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; + l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; + l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; + l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; + + optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); + + return optimize_success; +} + +static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->vmin.init_params.instance = ¶ms->dml->pmo_instance; + l->vmin.init_params.base_display_config = params->display_config; + return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); +} + +static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; + l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; + l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; + return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); +} + +static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + if (params->last_candidate_supported == false) + return false; + + l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; + l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; + l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; + return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); +} + +static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); +} + +static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.test_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); +} + +static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.optimize_params.base_display_config = params->display_config; + l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; + l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; + + return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); +} + +static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; + l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; + return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + bool test_passed = false; + bool optimize_succeeded = true; + bool candidate_validation_passed = true; + struct optimization_init_function_params init_params = { 0 }; + struct optimization_test_function_params test_params = { 0 }; + struct optimization_optimize_function_params optimize_params = { 0 }; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + + init_params.locals = &l->init_function_locals; + init_params.dml = params->dml; + init_params.display_config = &l->cur_candidate_display_cfg; + + if (params->init_function && !params->init_function(&init_params)) + return false; + + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->cur_candidate_display_cfg; + + test_passed = params->test_function(&test_params); + + while (!test_passed && optimize_succeeded) { + memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); + + optimize_params.locals = &l->optimize_function_locals; + optimize_params.dml = params->dml; + optimize_params.display_config = &l->cur_candidate_display_cfg; + optimize_params.optimized_display_config = &l->next_candidate_display_cfg; + optimize_params.last_candidate_supported = candidate_validation_passed; + + optimize_succeeded = params->optimize_function(&optimize_params); + + if (optimize_succeeded) { + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + + if (l->next_candidate_display_cfg.stage3.performed) + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; + else + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; + candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); + l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + } + + if (optimize_succeeded && candidate_validation_passed) { + memset(&test_params, 0, sizeof(struct optimization_test_function_params)); + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->next_candidate_display_cfg; + test_passed = params->test_function(&test_params); + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); + + // If optimization is not all or nothing, then store partial progress in output + if (!params->all_or_nothing) + copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); + } + } + + if (test_passed) + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return test_passed; +} + +static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + int highest_state, lowest_state, cur_state; + bool supported = false; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; + lowest_state = 0; + + while (highest_state > lowest_state) { + cur_state = (highest_state + lowest_state) / 2; + + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + l->mode_support_params.min_clk_index = cur_state; + supported = params->dml->core_instance.mode_support(&l->mode_support_params); + + if (supported) { + l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + highest_state = cur_state; + } else { + lowest_state = cur_state + 1; + } + } + l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; + + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return true; +} + +/* +* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. +* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means +* that the horizontal viewport does not span more than 2 cache slices. +* +* It optionally also can apply a constant shift to all the cache boundaries. +*/ +static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; +static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; + +static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, + int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) +{ + const int MAX_VP = 0xFFFFFF; + int left_cache_id; + int right_cache_id; + int range_start; + int range_end; + bool success = false; + + if (num_boundaries <= 1) { + if (first_offset && second_offset) { + *first_offset = 0; + *second_offset = -1; + } + success = true; + return success; + } else { + range_start = 0; + for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { + range_end = mcache_boundaries[left_cache_id] - shift - 1; + + if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) + break; + + range_start = range_end + 1; + } + + range_end = MAX_VP; + for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { + if (right_cache_id >= 0) + range_start = mcache_boundaries[right_cache_id] - shift; + else + range_start = 0; + + if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { + break; + } + range_end = range_start - 1; + } + right_cache_id = (right_cache_id + 1) % num_boundaries; + + if (right_cache_id == left_cache_id) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = -1; + } + success = true; + } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = right_cache_id; + } + success = true; + } + } + + return success; +} + +/* +* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. +* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. +*/ +static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, + int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) +{ + int max_shift = 0xFFFF; + unsigned int pipe_index; + unsigned int i, slice_width; + bool success = false; + + for (i = 0; i < num_boundaries; i++) { + if (i == 0) + slice_width = mcache_boundaries[i]; + else + slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; + + if (max_shift > (int)slice_width) { + max_shift = slice_width; + } + } + + for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { + success = true; + for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { + if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, + pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { + success = false; + break; + } + } + if (success) + break; + } + + return success; +} + +/* +* Counts the number of elements inside input array within the given span length. +* Formally, what is the size of the largest subset of the array where the largest and smallest element +* differ no more than the span. +*/ +static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) +{ + unsigned int i; + unsigned int span_start_value; + unsigned int span_start_index; + unsigned int greatest_element_count; + + if (array_size == 0) + return 1; + + if (span == 0) + return array_size > 0 ? 1 : 0; + + span_start_value = 0; + span_start_index = 0; + greatest_element_count = 0; + + while (span_start_index < array_size) { + for (i = span_start_index; i < array_size; i++) { + if (array[i] - span_start_value <= span) { + if (i - span_start_index + 1 > greatest_element_count) { + greatest_element_count = i - span_start_index + 1; + } + } else + break; + } + + span_start_index++; + + if (span_start_index < array_size) { + span_start_value = array[span_start_index - 1] + 1; + } + } + + return greatest_element_count; +} + +static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, + enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) +{ + int i, slice_width; + const char MAX_SCL_VP_OVERLAP = 3; + bool success = false; + + switch (scaling_transform) { + case dml2_scaling_transform_centered: + case dml2_scaling_transform_aspect_ratio: + case dml2_scaling_transform_fullscreen: + slice_width = full_vp_width / num_pipes; + for (i = 0; i < num_pipes; i++) { + pipe_vp_x_start[i] = i * slice_width; + pipe_vp_x_end[i] = (i + 1) * slice_width - 1; + + if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) + pipe_vp_x_start[i] = 0; + else + pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; + + if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) + pipe_vp_x_end[i] = full_vp_width - 1; + else + pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; + } + break; + case dml2_scaling_transform_explicit: + default: + success = false; + break; + } + + return success; +} + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; + + const int MAX_PIXEL_OVERLAP = 6; + int max_per_pipe_vp_p0 = 0; + int max_per_pipe_vp_p1 = 0; + int temp, p0shift, p1shift; + unsigned int plane_index = 0; + unsigned int i; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; + unsigned int num_boundaries; + enum dml2_scaling_transform scaling_transform; + const struct dml2_plane_parameters *plane; + const struct dml2_stream_parameters *stream; + + bool p0pass = false; + bool p1pass = false; + bool all_pass = true; + + for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { + if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) + continue; + + plane = ¶ms->display_cfg->plane_descriptors[plane_index]; + stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; + + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + + if (odm_combine_factor == 1) + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + else + mpc_combine_factor = 1; + + if (odm_combine_factor > 1) { + max_per_pipe_vp_p0 = plane->surface.plane0.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p0) + max_per_pipe_vp_p0 = temp; + + max_per_pipe_vp_p1 = plane->surface.plane1.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p1) + max_per_pipe_vp_p1 = temp; + } else { + max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; + max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; + } + + max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; + max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; + + p0shift = 0; + p1shift = 0; + + // The last element in the unshifted boundary array will always be the first pixel outside the + // plane, which means theres no mcache associated with it, so -1 + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { + p0pass = true; + } + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { + p1pass = true; + } + + if (!p0pass || !p1pass) { + if (odm_combine_factor > 1) { + num_dpps = odm_combine_factor; + scaling_transform = plane->composition.scaling_transform; + } else { + num_dpps = mpc_combine_factor; + scaling_transform = dml2_scaling_transform_fullscreen; + } + + if (!p0pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane0.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + params->mcache_allocations[plane_index].num_mcaches_plane0, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); + } + } + if (!p1pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane1.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + params->mcache_allocations[plane_index].num_mcaches_plane1, + &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); + } + } + } + + if (p0pass && p1pass) { + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; + } + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; + } + } + + params->per_plane_status[plane_index] = p0pass && p1pass; + all_pass &= p0pass && p1pass; + } + + return all_pass; +} + +static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) +{ + // Initialize all entries to special valid MCache ID and special valid split coordinate + per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; +} + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) +{ + int i; + unsigned int j; + int next_unused_cache_id = 0; + + for (i = 0; i < params->num_allocations; i++) { + if (!params->allocations[i].valid) + continue; + + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_plane0[0]; + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_plane1[0]; + + // If we need dedicated caches for mall requesting, then we assign them here. + if (params->allocations[i].requires_dedicated_mall_mcache) { + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_mall_plane0[0]; + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_mall_plane1[0]; + } + + // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same + // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the + // largest mcacheID of P0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + + // If we need dedicated caches handle last slice sharing + if (params->allocations[i].requires_dedicated_mall_mcache) { + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular + // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) + if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; + } + } + + // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting + if (!params->allocations[i].requires_dedicated_mall_mcache) { + memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, + sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); + memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, + sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); + } + } +} + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; + + unsigned int total_mcaches_required; + unsigned int i; + bool result = false; + + if (dml->soc_bbox.num_dcc_mcaches == 0) { + return true; + } + + total_mcaches_required = 0; + l->calc_mcache_params.instance = &dml->core_instance; + for (i = 0; i < params->display_config->num_planes; i++) { + if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { + memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); + continue; + } + + l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; + l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; + l->calc_mcache_params.plane_index = i; + + if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { + result = false; + break; + } + + if (params->mcache_allocations[i].valid) { + total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; + if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) + total_mcaches_required--; + } + } + DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + + if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { + result = false; + } else { + result = true; + } + + return result; +} + +static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; + + bool result = false; + bool mcache_success = false; + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (result) { + struct optimization_phase_params mcache_phase = { + .dml = dml, + .display_config = &l->base_display_config_with_meta, + .test_function = dml2_top_optimization_test_function_mcache, + .optimize_function = dml2_top_optimization_optimize_function_mcache, + .optimized_display_config = &l->optimized_display_config_with_meta, + .all_or_nothing = false, + }; + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = dpmm_programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + } + + in_out->is_supported = mcache_success; + result = result && in_out->is_supported; + + return result; +} + +static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; + + bool result = false; + bool mcache_success = false; + bool uclk_pstate_success = false; + bool vmin_success = false; + bool stutter_success = false; + + memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); + memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); + + memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); + + setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + dml->core_instance.populate_informative(&l->informative_params); + + return false; + } + + /* + * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode + */ + memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); + l->min_clock_for_latency_phase.dml = dml; + l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; + l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; + l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->min_clock_for_latency_phase.all_or_nothing = false; + + dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + } + + /* + * Phase 2: Satisfy DCC mcache requirements + */ + memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); + l->mcache_phase.dml = dml; + l->mcache_phase.display_config = &l->base_display_config_with_meta; + l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; + l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; + l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->mcache_phase.all_or_nothing = true; + + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); + + if (!mcache_success) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + + dml->core_instance.populate_informative(&l->informative_params); + + in_out->programming->informative.failed_mcache_validation = true; + return false; + } + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + + /* + * Phase 3: Optimize for Pstate + */ + memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); + l->uclk_pstate_phase.dml = dml; + l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; + l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; + l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; + l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; + l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->uclk_pstate_phase.all_or_nothing = true; + + uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); + + if (uclk_pstate_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage3.success = true; + } + + /* + * Phase 4: Optimize for Vmin + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->vmin_phase.dml = dml; + l->vmin_phase.display_config = &l->base_display_config_with_meta; + l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; + l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; + l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; + l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->vmin_phase.all_or_nothing = false; + + vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); + + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = vmin_success; + } + + /* + * Phase 5: Optimize for Stutter + */ + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + l->stutter_phase.dml = dml; + l->stutter_phase.display_config = &l->base_display_config_with_meta; + l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; + l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; + l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; + l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->stutter_phase.all_or_nothing = true; + + stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); + + if (stutter_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage5.success = true; + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = in_out->programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + if (!result) + in_out->programming->informative.failed_dpmm = true; + } + + if (result) { + l->mode_programming_params.instance = &dml->core_instance; + l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; + l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; + l->mode_programming_params.programming = in_out->programming; + result = dml->core_instance.mode_programming(&l->mode_programming_params); + if (!result) + in_out->programming->informative.failed_mode_programming = true; + } + + if (result) { + l->dppm_map_watermarks_params.core = &dml->core_instance; + l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_watermarks_params.programming = in_out->programming; + result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); + } + + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = result; + + dml->core_instance.populate_informative(&l->informative_params); + + return result; +} + +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) +{ + bool success = true; + int config_index, pipe_index; + int first_offset, second_offset; + int free_per_plane_reg_index = 0; + + memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); + + for (config_index = 0; config_index < params->num_configurations; config_index++) { + for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { + // Allocate storage for the mcache regs + params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; + + reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); + + if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { + // P0 always enabled + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + } + + // Populate P1 if enabled + if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + } + } + } + } + } + + return success; +} + +static const struct dml2_top_funcs soc15_funcs = { + .check_mode_supported = dml2_top_soc15_check_mode_supported, + .build_mode_programming = dml2_top_soc15_build_mode_programming, + .build_mcache_programming = dml2_top_soc15_build_mcache_programming, +}; + +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; + struct dml2_core_initialize_in_out core_init_params = { 0 }; + struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; + struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; + bool result = false; + + memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); + + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + dml->project_id = in_out->options.project_id; + dml->pmo_options = in_out->options.pmo_options; + + // Initialize All Components + result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); + + if (result) + result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); + + if (result) + result = dml2_core_create(in_out->options.project_id, &dml->core_instance); + + if (result) { + mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; + mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; + result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); + } + + if (result) { + core_init_params.project_id = in_out->options.project_id; + core_init_params.instance = &dml->core_instance; + core_init_params.minimum_clock_table = &dml->min_clk_table; + core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; + core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; + core_init_params.ip_caps = &in_out->ip_caps; + core_init_params.soc_bb = &in_out->soc_bb; + result = dml->core_instance.initialize(&core_init_params); + + if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + } + } + + if (result) + result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); + + if (result) { + pmo_init_params.instance = &dml->pmo_instance; + pmo_init_params.soc_bb = &dml->soc_bbox; + pmo_init_params.ip_caps = &dml->ip_caps; + pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; + pmo_init_params.options = &dml->pmo_options; + dml->pmo_instance.initialize(&pmo_init_params); + } + dml->funcs = soc15_funcs; + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h new file mode 100644 index 000000000000..53bd8602f9ef --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_top/dml2_top_soc15.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_SOC15_H__ +#define __DML2_TOP_SOC15_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out); + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); +#endif /* __DML2_TOP_SOC15_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h new file mode 100644 index 000000000000..611c80f4f1bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_debug.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_DEBUG_H__ +#define __DML2_DEBUG_H__ + +#include "os_types.h" +#define DML_ASSERT(condition) ASSERT(condition) +#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN +#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) + +/* private helper macros */ +#define _BOOL_FORMAT(field) "%s", field ? "true" : "false" +#define _UINT_FORMAT(field) "%u", field +#define _INT_FORMAT(field) "%d", field +#define _DOUBLE_FORMAT(field) "%lf", field +#define _ELEMENT_FUNC "function" +#define _ELEMENT_COMP_IF "component_interface" +#define _ELEMENT_TOP_IF "top_interface" +#define _LOG_ENTRY(element) do { \ + DML_LOG_INTERNAL("<"element" name=\""); \ + DML_LOG_INTERNAL(__func__); \ + DML_LOG_INTERNAL("\">\n"); \ +} while (0) +#define _LOG_EXIT(element) DML_LOG_INTERNAL("\n") +#define _LOG_SCALAR(field, format) do { \ + DML_LOG_INTERNAL(#field" = "format(field)); \ + DML_LOG_INTERNAL("\n"); \ +} while (0) +#define _LOG_ARRAY(field, size, format) do { \ + DML_LOG_INTERNAL(#field " = ["); \ + for (int _i = 0; _i < (int) size; _i++) { \ + DML_LOG_INTERNAL(format(field[_i])); \ + if (_i + 1 == (int) size) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ +}} while (0) +#define _LOG_2D_ARRAY(field, size0, size1, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL(format(field[_i][_j])); \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) +#define _LOG_3D_ARRAY(field, size0, size1, size2, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL("["); \ + for (int _k = 0; _k < (int) size2; _k++) { \ + DML_LOG_INTERNAL(format(field[_i][_j][_k])); \ + if (_k + 1 == (int) size2) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) + +/* fatal errors for unrecoverable DML states until a full reset */ +#define DML_LOG_LEVEL_FATAL 0 +/* unexpected but recoverable failures inside DML */ +#define DML_LOG_LEVEL_ERROR 1 +/* unexpected inputs or events to DML */ +#define DML_LOG_LEVEL_WARN 2 +/* high level tracing of DML interfaces */ +#define DML_LOG_LEVEL_INFO 3 +/* tracing of DML internal executions */ +#define DML_LOG_LEVEL_DEBUG 4 +/* detailed tracing of DML calculation procedure */ +#define DML_LOG_LEVEL_VERBOSE 5 + +#ifndef DML_LOG_LEVEL +#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT +#endif /* #ifndef DML_LOG_LEVEL */ + +/* public macros for DML_LOG_LEVEL_FATAL and up */ +#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) + +/* public macros for DML_LOG_LEVEL_ERROR and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR +#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) +#define DML_ASSERT_MSG(condition, fmt, ...) \ + do { \ + if (!(condition)) { \ + DML_LOG_ERROR("ASSERT hit in %s line %d\n", __func__, __LINE__); \ + DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ + DML_ASSERT(condition); \ + } \ + } while (0) +#else +#define DML_LOG_ERROR(fmt, ...) ((void)0) +#define DML_ASSERT_MSG(condition, fmt, ...) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_WARN and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN +#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) +#else +#define DML_LOG_WARN(fmt, ...) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_INFO and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO +#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) +#define DML_LOG_TOP_IF_ENTER() _LOG_ENTRY(_ELEMENT_TOP_IF) +#define DML_LOG_TOP_IF_EXIT() _LOG_EXIT(_ELEMENT_TOP_IF) +#else +#define DML_LOG_INFO(fmt, ...) ((void)0) +#define DML_LOG_TOP_IF_ENTER() ((void)0) +#define DML_LOG_TOP_IF_EXIT() ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_DEBUG and up */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG +#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) +#define DML_LOG_COMP_IF_ENTER() _LOG_ENTRY(_ELEMENT_COMP_IF) +#define DML_LOG_COMP_IF_EXIT() _LOG_EXIT(_ELEMENT_COMP_IF) +#define DML_LOG_FUNC_ENTER() _LOG_ENTRY(_ELEMENT_FUNC) +#define DML_LOG_FUNC_EXIT() _LOG_EXIT(_ELEMENT_FUNC) +#define DML_LOG_DEBUG_BOOL(field) _LOG_SCALAR(field, _BOOL_FORMAT) +#define DML_LOG_DEBUG_UINT(field) _LOG_SCALAR(field, _UINT_FORMAT) +#define DML_LOG_DEBUG_INT(field) _LOG_SCALAR(field, _INT_FORMAT) +#define DML_LOG_DEBUG_DOUBLE(field) _LOG_SCALAR(field, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) _LOG_ARRAY(field, size, _BOOL_FORMAT) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) _LOG_ARRAY(field, size, _UINT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) _LOG_ARRAY(field, size, _INT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) _LOG_ARRAY(field, size, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _BOOL_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _UINT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _INT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _BOOL_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _UINT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _INT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _DOUBLE_FORMAT) +#else +#define DML_LOG_DEBUG(fmt, ...) ((void)0) +#define DML_LOG_COMP_IF_ENTER() ((void)0) +#define DML_LOG_COMP_IF_EXIT() ((void)0) +#define DML_LOG_FUNC_ENTER() ((void)0) +#define DML_LOG_FUNC_EXIT() ((void)0) +#define DML_LOG_DEBUG_BOOL(field) ((void)0) +#define DML_LOG_DEBUG_UINT(field) ((void)0) +#define DML_LOG_DEBUG_INT(field) ((void)0) +#define DML_LOG_DEBUG_DOUBLE(field) ((void)0) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) ((void)0) +#endif + +/* public macros for DML_LOG_LEVEL_VERBOSE */ +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE +#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_VERBOSE(fmt, ...) ((void)0) +#endif /* #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE */ +#endif /* __DML2_DEBUG_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h new file mode 100644 index 000000000000..9f562f0c4797 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/inc/dml2_internal_shared_types.h @@ -0,0 +1,1004 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_INTERNAL_SHARED_TYPES_H__ +#define __DML2_INTERNAL_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_types.h" +#include "dml2_core_shared_types.h" +/* +* DML2 MCG Types and Interfaces +*/ + +#define DML_MCG_MAX_CLK_TABLE_SIZE 20 + +struct dram_bw_to_min_clk_table_entry { + unsigned long long pre_derate_dram_bw_kbps; + unsigned long min_fclk_khz; + unsigned long min_dcfclk_khz; +}; + +struct dml2_mcg_dram_bw_to_min_clk_table { + struct dram_bw_to_min_clk_table_entry entries[DML_MCG_MAX_CLK_TABLE_SIZE]; + + unsigned int num_entries; +}; + +struct dml2_mcg_min_clock_table { + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dscclk; + unsigned int dtbclk; + unsigned int phyclk; + unsigned int fclk; + unsigned int dcfclk; + } max_clocks_khz; + + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dtbclk; + } max_ss_clocks_khz; + + struct { + unsigned int dprefclk; + unsigned int xtalclk; + unsigned int pcierefclk; + unsigned int dchubrefclk; + unsigned int amclk; + } fixed_clocks_khz; + + struct dml2_mcg_dram_bw_to_min_clk_table dram_bw_table; +}; + +struct dml2_mcg_build_min_clock_table_params_in_out { + /* + * Input + */ + struct dml2_soc_bb *soc_bb; + struct { + bool perform_pseudo_build; + } clean_me_up; + + /* + * Output + */ + struct dml2_mcg_min_clock_table *min_clk_table; +}; +struct dml2_mcg_instance { + bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); +}; + +/* +* DML2 DPMM Types and Interfaces +*/ + +struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { + /* + * Input + */ + struct dml2_core_ip_params *ip; + struct dml2_soc_bb *soc_bb; + struct dml2_mcg_min_clock_table *min_clk_table; + const struct display_configuation_with_meta *display_cfg; + struct { + bool perform_pseudo_map; + struct dml2_core_internal_soc_bb *soc_bb; + } clean_me_up; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_map_watermarks_params_in_out { + /* + * Input + */ + const struct display_configuation_with_meta *display_cfg; + const struct dml2_core_instance *core; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + +struct dml2_dpmm_instance { + bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); + bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + + struct dml2_dpmm_scratch dpmm_scratch; +}; + +/* +* DML2 Core Types and Interfaces +*/ + +struct dml2_core_initialize_in_out { + enum dml2_project_id project_id; + struct dml2_core_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_mcg_min_clock_table *minimum_clock_table; + + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + + // FIXME_STAGE2 can remove but dcn3 version still need this + struct { + struct soc_bounding_box_st *soc_bb; + struct soc_states_st *soc_states; + } legacy; +}; + +struct core_bandwidth_requirements { + int urgent_bandwidth_kbytes_per_sec; + int average_bandwidth_kbytes_per_sec; +}; + +struct core_plane_support_info { + int dpps_used; + int dram_change_latency_hiding_margin_in_active; + int active_latency_hiding_us; + int mall_svp_size_requirement_ways; + int nominal_vblank_pstate_latency_hiding_us; + unsigned int dram_change_vactive_det_fill_delay_us; +}; + +struct core_stream_support_info { + unsigned int odms_used; + unsigned int num_odm_output_segments; // for odm split mode (e.g. a value of 2 for odm_mode_mso_1to2) + + /* FAMS2 SubVP support info */ + unsigned int phantom_min_v_active; + unsigned int phantom_v_startup; + + unsigned int phantom_v_active; + unsigned int phantom_v_total; + int vblank_reserved_time_us; + int num_dsc_slices; + bool dsc_enable; +}; + +struct core_display_cfg_support_info { + bool is_supported; + + struct core_stream_support_info stream_support_info[DML2_MAX_PLANES]; + struct core_plane_support_info plane_support_info[DML2_MAX_PLANES]; + + struct { + struct dml2_core_internal_mode_support_info support_info; + } clean_me_up; +}; + +struct dml2_core_mode_support_result { + struct { + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } svp_prefetch; + + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } active; + + unsigned int dispclk_khz; + unsigned int dpprefclk_khz; + unsigned int dtbrefclk_khz; + unsigned int dcfclk_deepsleep_khz; + unsigned int socclk_khz; + + unsigned int uclk_pstate_supported; + unsigned int fclk_pstate_supported; + } global; + + struct { + unsigned int dscclk_khz; + unsigned int dtbclk_khz; + unsigned int phyclk_khz; + } per_stream[DML2_MAX_PLANES]; + + struct { + unsigned int dppclk_khz; + unsigned int mall_svp_allocation_mblks; + unsigned int mall_full_frame_allocation_mblks; + } per_plane[DML2_MAX_PLANES]; + + struct core_display_cfg_support_info cfg_support_info; +}; + +struct dml2_optimization_stage1_state { + bool performed; + bool success; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage2_state { + bool performed; + bool success; + + // Whether or not each plane supports mcache + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + bool per_plane_mcache_support[DML2_MAX_PLANES]; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; +}; + +#define DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS 8 +#define DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE 10 +#define DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE 3 + +struct dml2_implicit_svp_meta { + bool valid; + unsigned long v_active; + unsigned long v_total; + unsigned long v_front_porch; +}; + +struct dml2_pstate_per_method_common_meta { + /* generic params */ + unsigned int allow_start_otg_vline; + unsigned int allow_end_otg_vline; + /* scheduling params */ + double allow_time_us; + double disallow_time_us; + double period_us; +}; + +struct dml2_pstate_meta { + bool valid; + double otg_vline_time_us; + unsigned int scheduling_delay_otg_vlines; + unsigned int vertical_interrupt_ack_delay_otg_vlines; + unsigned int allow_to_target_delay_otg_vlines; + unsigned int contention_delay_otg_vlines; + unsigned int min_allow_width_otg_vlines; + unsigned int nom_vtotal; + unsigned int vblank_start; + double nom_refresh_rate_hz; + double nom_frame_time_us; + unsigned int max_vtotal; + double min_refresh_rate_hz; + double max_frame_time_us; + unsigned int blackout_otg_vlines; + struct { + double max_vactive_det_fill_delay_us; + unsigned int max_vactive_det_fill_delay_otg_vlines; + struct dml2_pstate_per_method_common_meta common; + } method_vactive; + struct { + struct dml2_pstate_per_method_common_meta common; + } method_vblank; + struct { + unsigned int programming_delay_otg_vlines; + unsigned int df_throttle_delay_otg_vlines; + unsigned int prefetch_to_mall_delay_otg_vlines; + unsigned long phantom_vactive; + unsigned long phantom_vfp; + unsigned long phantom_vtotal; + struct dml2_pstate_per_method_common_meta common; + } method_subvp; + struct { + unsigned int programming_delay_otg_vlines; + unsigned int stretched_vtotal; + struct dml2_pstate_per_method_common_meta common; + } method_drr; +}; + +/* mask of synchronized timings by stream index */ +struct dml2_pmo_synchronized_timing_groups { + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; +}; + +struct dml2_optimization_stage3_state { + bool performed; + bool success; + + // The pstate support mode for each plane + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; + + // Meta-data for implicit SVP generation, indexed by stream index + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + + // Meta-data for FAMS2 + bool fams2_required; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage4_state { + bool performed; + bool success; + bool unoptimizable_streams[DML2_MAX_DCN_PIPES]; +}; + +struct dml2_optimization_stage5_state { + bool performed; + bool success; + + bool optimal_reserved_time_in_vblank_us; + bool vblank_includes_z8_optimization; +}; + +struct display_configuation_with_meta { + struct dml2_display_cfg display_config; + + struct dml2_core_mode_support_result mode_support_result; + + // Stage 1 = Min Clocks for Latency + struct dml2_optimization_stage1_state stage1; + + // Stage 2 = MCache + struct dml2_optimization_stage2_state stage2; + + // Stage 3 = UCLK PState + struct dml2_optimization_stage3_state stage3; + + // Stage 4 = Vmin + struct dml2_optimization_stage4_state stage4; + + // Stage 5 = Stutter + struct dml2_optimization_stage5_state stage5; +}; + +struct dml2_pmo_pstate_strategy { + enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; +struct dml2_core_mode_support_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + + struct dml2_mcg_min_clock_table *min_clk_table; + int min_clk_index; + /* + * Outputs + */ + struct dml2_core_mode_support_result mode_support_result; + + struct { + // Inputs + struct dml_display_cfg_st *display_cfg; + + // Outputs + struct dml_mode_support_info_st *support_info; + unsigned int out_lowest_state_idx; + unsigned int min_fclk_khz; + unsigned int min_dcfclk_khz; + unsigned int min_dram_speed_mts; + unsigned int min_socclk_khz; + unsigned int min_dscclk_khz; + unsigned int min_dtbclk_khz; + unsigned int min_phyclk_khz; + } legacy; +}; + +struct dml2_core_mode_programming_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + /* + * Outputs (also Input the clk freq are also from programming struct) + */ + struct dml2_display_cfg_programming *programming; + +}; + +struct dml2_core_populate_informative_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + + // If this is set, then the mode was supported, and mode programming + // was successfully run. + // Otherwise, mode programming was not run, because mode support failed. + bool mode_is_supported; + + /* + * Outputs + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_calculate_mcache_allocation_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int plane_index; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocation; +}; + +struct dml2_core_internal_state_inputs { + unsigned int dummy; +}; + +struct dml2_core_internal_state_intermediates { + unsigned int dummy; +}; + +struct dml2_core_mode_support_locals { + union { + struct dml2_core_calcs_mode_support_ex mode_support_ex_params; + }; + struct dml2_display_cfg svp_expanded_display_cfg; + struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; +}; + +struct dml2_core_mode_programming_locals { + union { + struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; + }; + struct dml2_display_cfg svp_expanded_display_cfg; +}; + +struct dml2_core_scratch { + struct dml2_core_mode_support_locals mode_support_locals; + struct dml2_core_mode_programming_locals mode_programming_locals; + int main_stream_index_from_svp_stream_index[DML2_MAX_PLANES]; + int svp_stream_index_from_main_stream_index[DML2_MAX_PLANES]; + int main_plane_index_to_phantom_plane_index[DML2_MAX_PLANES]; + int phantom_plane_index_to_main_plane_index[DML2_MAX_PLANES]; +}; + +struct dml2_core_instance { + enum dml2_project_id project_id; + struct dml2_mcg_min_clock_table *minimum_clock_table; + struct dml2_core_internal_state_inputs inputs; + struct dml2_core_internal_state_intermediates intermediates; + + struct dml2_core_scratch scratch; + + bool (*initialize)(struct dml2_core_initialize_in_out *in_out); + bool (*mode_support)(struct dml2_core_mode_support_in_out *in_out); + bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out); + bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out); + bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out); + + struct { + struct dml2_core_internal_display_mode_lib mode_lib; + } clean_me_up; +}; + +/* +* DML2 PMO Types and Interfaces +*/ + +struct dml2_pmo_initialize_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + struct dml2_pmo_options *options; + int mcg_clock_table_size; +}; + +struct dml2_pmo_optimize_dcc_mcache_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct dml2_display_cfg *display_config; + bool *dcc_mcache_supported; + struct core_display_cfg_support_info *cfg_support_info; + + /* + * Output + */ + struct dml2_display_cfg *optimized_display_cfg; +}; + +struct dml2_pmo_init_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct display_configuation_with_meta *display_config; + const struct dml2_soc_vmin_clock_limits *vmin_limits; +}; + +struct dml2_pmo_optimize_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na) +#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr) +#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp) + +#define PMO_DCN4_MAX_DISPLAYS 4 +#define PMO_DCN4_MAX_NUM_VARIANTS 2 +#define PMO_DCN4_MAX_BASE_STRATEGIES 10 + +struct dml2_scheduling_check_locals { + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; +}; + +struct dml2_pmo_scratch { + union { + struct { + double reserved_time_candidates[DML2_MAX_PLANES][DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS]; + int reserved_time_candidates_count[DML2_MAX_PLANES]; + int current_candidate[DML2_MAX_PLANES]; + int min_latency_index; + int max_latency_index; + int cur_latency_index; + int stream_mask; + } pmo_dcn3; + struct { + struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2]; + unsigned int num_expanded_override_strategies; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + int num_pstate_candidates; + int cur_pstate_candidate; + + unsigned int stream_plane_mask[DML2_MAX_PLANES]; + + unsigned int stream_vactive_capability_mask; + + int min_latency_index; + int max_latency_index; + int cur_latency_index; + + // Stores all the implicit SVP meta information indexed by stream index of the display + // configuration under inspection, built at optimization stage init + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + struct dml2_pstate_meta stream_pstate_meta[DML2_MAX_PLANES]; + + unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE]; + unsigned int num_stutter_candidates; + unsigned int cur_stutter_candidate; + bool z8_vblank_optimizable; + + /* mask of synchronized timings by stream index */ + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; + + /* scheduling check locals */ + struct dml2_pstate_per_method_common_meta group_common_pstate_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; + double group_phase_offset[DML2_MAX_PLANES]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_init_data { + union { + struct { + /* populated once during initialization */ + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; + unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_instance { + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_pmo_options *options; + + int disp_clk_vmin_threshold; + int mpc_combine_limit; + int odm_combine_limit; + int mcg_clock_table_size; + union { + struct { + struct { + int prefetch_end_to_mall_start_us; + int fw_processing_delay_us; + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + } v1; + struct { + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } drr; + } v2; + } fams_params; + + bool (*initialize)(struct dml2_pmo_initialize_in_out *in_out); + bool (*optimize_dcc_mcache)(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + + bool (*init_for_vmin)(struct dml2_pmo_init_for_vmin_in_out *in_out); + bool (*test_for_vmin)(struct dml2_pmo_test_for_vmin_in_out *in_out); + bool (*optimize_for_vmin)(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + + bool (*init_for_uclk_pstate)(struct dml2_pmo_init_for_pstate_support_in_out *in_out); + bool (*test_for_uclk_pstate)(struct dml2_pmo_test_for_pstate_support_in_out *in_out); + bool (*optimize_for_uclk_pstate)(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + + bool (*init_for_stutter)(struct dml2_pmo_init_for_stutter_in_out *in_out); + bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out); + bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + + struct dml2_pmo_init_data init_data; + struct dml2_pmo_scratch scratch; +}; + +/* +* DML2 MCache Types +*/ + +struct top_mcache_validate_admissability_in_out { + struct dml2_instance *dml2_instance; + + const struct dml2_display_cfg *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + struct dml2_mcache_surface_allocation *mcache_allocations; + + bool per_plane_status[DML2_MAX_PLANES]; + + struct { + const struct dml_mode_support_info_st *mode_support_info; + } legacy; +}; + +struct top_mcache_assign_ids_in_out { + /* + * Input + */ + const struct dml2_mcache_surface_allocation *mcache_allocations; + int plane_count; + + int per_pipe_viewport_x_start[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int per_pipe_viewport_x_end[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int pipe_count_per_plane[DML2_MAX_PLANES]; + + struct dml2_display_mcache_regs *current_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + + /* + * Output + */ + struct dml2_display_mcache_regs mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + struct dml2_build_mcache_programming_in_out *mcache_programming; +}; + +struct top_mcache_calc_mcache_count_and_offsets_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocations; +}; + +struct top_mcache_assign_global_mcache_ids_in_out { + /* + * Inputs/Outputs + */ + struct dml2_mcache_surface_allocation *allocations; + int num_allocations; +}; + +/* +* DML2 Top Types +*/ + +struct dml2_initialize_instance_locals { + int dummy; +}; + +struct dml2_optimization_init_function_locals { + union { + struct { + struct dml2_pmo_init_for_pstate_support_in_out init_params; + } uclk_pstate; + struct { + struct dml2_pmo_init_for_stutter_in_out stutter_params; + } stutter; + struct { + struct dml2_pmo_init_for_vmin_in_out init_params; + } vmin; + }; +}; + +struct dml2_optimization_test_function_locals { + union { + struct { + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + } test_mcache; + struct { + struct dml2_pmo_test_for_vmin_in_out pmo_test_vmin_params; + } test_vmin; + struct { + struct dml2_pmo_test_for_pstate_support_in_out test_params; + } uclk_pstate; + struct { + struct dml2_pmo_test_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_optimize_function_locals { + union { + struct { + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + } optimize_mcache; + struct { + struct dml2_pmo_optimize_for_vmin_in_out pmo_optimize_vmin_params; + } optimize_vmin; + struct { + struct dml2_pmo_optimize_for_pstate_support_in_out optimize_params; + } uclk_pstate; + struct { + struct dml2_pmo_optimize_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_phase_locals { + struct display_configuation_with_meta cur_candidate_display_cfg; + struct display_configuation_with_meta next_candidate_display_cfg; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_init_function_locals init_function_locals; + struct dml2_optimization_test_function_locals test_function_locals; + struct dml2_optimization_optimize_function_locals optimize_function_locals; +}; + +struct dml2_check_mode_supported_locals { + struct dml2_display_cfg display_cfg_working_copy; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; + struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; +}; + +struct optimization_init_function_params { + struct dml2_optimization_init_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_test_function_params { + struct dml2_optimization_test_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_optimize_function_params { + bool last_candidate_supported; + struct dml2_optimization_optimize_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; + struct display_configuation_with_meta *optimized_display_config; +}; + +struct optimization_phase_params { + struct dml2_instance *dml; + const struct display_configuation_with_meta *display_config; // Initial Display Configuration + bool (*init_function)(const struct optimization_init_function_params *params); // Test function to determine optimization is complete + bool (*test_function)(const struct optimization_test_function_params *params); // Test function to determine optimization is complete + bool (*optimize_function)(const struct optimization_optimize_function_params *params); // Function which produces a more optimized display configuration + struct display_configuation_with_meta *optimized_display_config; // The optimized display configuration + + bool all_or_nothing; +}; + +struct dml2_build_mode_programming_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; + struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; + struct dml2_dpmm_map_watermarks_params_in_out dppm_map_watermarks_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct optimization_phase_params min_clock_for_latency_phase; + struct optimization_phase_params mcache_phase; + struct optimization_phase_params uclk_pstate_phase; + struct optimization_phase_params vmin_phase; + struct optimization_phase_params stutter_phase; +}; + +struct dml2_legacy_core_build_mode_programming_wrapper_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct dml2_display_cfg optimized_display_cfg; + struct core_display_cfg_support_info core_support_info; +}; + +struct dml2_top_mcache_verify_mcache_size_locals { + struct dml2_calculate_mcache_allocation_in_out calc_mcache_params; +}; + +struct dml2_top_mcache_validate_admissability_locals { + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane0; + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane1; +}; + +struct dml2_top_display_cfg_support_info { + const struct dml2_display_cfg *display_config; + struct core_display_cfg_support_info core_info; +}; + +struct dml2_top_funcs { + bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out); + bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out); + bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out); +}; + +struct dml2_instance { + enum dml2_project_id project_id; + + struct dml2_core_instance core_instance; + struct dml2_mcg_instance mcg_instance; + struct dml2_dpmm_instance dpmm_instance; + struct dml2_pmo_instance pmo_instance; + + struct dml2_soc_bb soc_bbox; + struct dml2_ip_capabilities ip_caps; + + struct dml2_mcg_min_clock_table min_clk_table; + struct dml2_pmo_options pmo_options; + struct dml2_top_funcs funcs; + + struct { + struct dml2_initialize_instance_locals initialize_instance_locals; + struct dml2_top_mcache_verify_mcache_size_locals mcache_verify_mcache_size_locals; + struct dml2_top_mcache_validate_admissability_locals mcache_validate_admissability_locals; + struct dml2_check_mode_supported_locals check_mode_supported_locals; + struct dml2_build_mode_programming_locals build_mode_programming_locals; + } scratch; + + struct { + struct { + struct dml2_legacy_core_build_mode_programming_wrapper_locals legacy_core_build_mode_programming_wrapper_locals; + } scratch; + } legacy; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c new file mode 100644 index 000000000000..4cfe64aa8492 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.c @@ -0,0 +1,1174 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml2_mall_phantom.h" + +#include "dml2_dc_types.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_dc_resource_mgmt.h" + +#define MAX_ODM_FACTOR 4 +#define MAX_MPCC_FACTOR 4 + +struct dc_plane_pipe_pool { + int pipes_assigned_to_plane[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; + bool pipe_used[MAX_ODM_FACTOR][MAX_MPCC_FACTOR]; + int num_pipes_assigned_to_plane_for_mpcc_combine; + int num_pipes_assigned_to_plane_for_odm_combine; +}; + +struct dc_pipe_mapping_scratch { + struct { + unsigned int odm_factor; + unsigned int odm_slice_end_x[MAX_PIPES]; + struct pipe_ctx *next_higher_pipe_for_odm_slice[MAX_PIPES]; + } odm_info; + struct { + unsigned int mpc_factor; + struct pipe_ctx *prev_odm_pipe; + } mpc_info; + + struct dc_plane_pipe_pool pipe_pool; +}; + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i]->stream_id == stream_id) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static int find_disp_cfg_idx_by_plane_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int plane_id) +{ + int i; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (mapping->disp_cfg_to_plane_id_valid[i] && mapping->disp_cfg_to_plane_id[i] == plane_id) + return i; + } + + ASSERT(false); + return __DML2_WRAPPER_MAX_STREAMS_PLANES__; +} + +static int find_disp_cfg_idx_by_stream_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int stream_id) +{ + int i; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (mapping->disp_cfg_to_stream_id_valid[i] && mapping->disp_cfg_to_stream_id[i] == stream_id) + return i; + } + + ASSERT(false); + return __DML2_WRAPPER_MAX_STREAMS_PLANES__; +} + +// The master pipe of a stream is defined as the top pipe in odm slice 0 +static struct pipe_ctx *find_master_pipe_of_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id) +{ + int i; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].stream && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + if (!state->res_ctx.pipe_ctx[i].prev_odm_pipe && !state->res_ctx.pipe_ctx[i].top_pipe) + return &state->res_ctx.pipe_ctx[i]; + } + } + + return NULL; +} + +static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx, + struct dc_state *state, unsigned int plane_id) +{ + int i; + unsigned int plane_id_assigned_to_pipe; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { + if (plane_id_assigned_to_pipe == plane_id) + return &state->res_ctx.pipe_ctx[i]; + } + } + + return NULL; +} + +static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, + struct dc_state *state, unsigned int plane_id, unsigned int *pipes) +{ + int i; + unsigned int num_found = 0; + unsigned int plane_id_assigned_to_pipe = -1; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || !pipe->stream) + continue; + + get_plane_id(ctx, state, pipe->plane_state, pipe->stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[pipe->pipe_idx], + &plane_id_assigned_to_pipe); + if (plane_id_assigned_to_pipe == plane_id && !pipe->prev_odm_pipe + && (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) { + while (pipe) { + struct pipe_ctx *mpc_pipe = pipe; + + while (mpc_pipe) { + pipes[num_found++] = mpc_pipe->pipe_idx; + mpc_pipe = mpc_pipe->bottom_pipe; + if (!mpc_pipe) + break; + if (mpc_pipe->plane_state != pipe->plane_state) + mpc_pipe = NULL; + } + pipe = pipe->next_odm_pipe; + } + break; + } + } + + return num_found; +} + +static bool validate_pipe_assignment(const struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, const struct dml2_dml_to_dc_pipe_mapping *mapping) +{ +// int i, j, k; +// +// unsigned int plane_id; +// +// unsigned int disp_cfg_index; +// +// unsigned int pipes_assigned_to_plane[MAX_PIPES]; +// unsigned int num_pipes_assigned_to_plane; +// +// struct pipe_ctx *top_pipe; +// +// for (i = 0; i < state->stream_count; i++) { +// for (j = 0; j < state->stream_status[i]->plane_count; j++) { +// if (get_plane_id(state, state->stream_status.plane_states[j], &plane_id)) { +// disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); +// num_pipes_assigned_to_plane = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes_assigned_to_plane); +// +// if (disp_cfg_index >= 0 && num_pipes_assigned_to_plane > 0) { +// // Verify the number of pipes assigned matches +// if (disp_cfg->hw.DPPPerSurface != num_pipes_assigned_to_plane) +// return false; +// +// top_pipe = find_top_pipe_in_tree(state->res_ctx.pipe_ctx[pipes_assigned_to_plane[0]]); +// +// // Verify MPC and ODM combine +// if (disp_cfg->hw.ODMMode == dml_odm_mode_bypass) { +// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, false); +// } else { +// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, true); +// } +// +// // TODO: could also do additional verification that the pipes in tree are the same as +// // pipes_assigned_to_plane +// } else { +// ASSERT(false); +// return false; +// } +// } else { +// ASSERT(false); +// return false; +// } +// } +// } + return true; +} + +static bool is_plane_using_pipe(const struct pipe_ctx *pipe) +{ + if (pipe->plane_state) + return true; + + return false; +} + +static bool is_pipe_free(const struct pipe_ctx *pipe) +{ + if (!pipe->plane_state && !pipe->stream) + return true; + + return false; +} + +static unsigned int find_preferred_pipe_candidates(const struct dc_state *existing_state, + const int pipe_count, + const unsigned int stream_id, + unsigned int *preferred_pipe_candidates) +{ + unsigned int num_preferred_candidates = 0; + int i; + + /* There is only one case which we consider for adding a pipe to the preferred + * pipe candidate array: + * + * 1. If the existing stream id of the pipe is equivalent to the stream id + * of the stream we are trying to achieve MPC/ODM combine for. This allows + * us to minimize the changes in pipe topology during the transition. + * + * However this condition comes with a caveat. We need to ignore pipes that will + * require a change in OPP but still have the same stream id. For example during + * an MPC to ODM transiton. + * + * Adding check to avoid pipe select on the head pipe by utilizing dc resource + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. + */ + if (existing_state) { + for (i = 0; i < pipe_count; i++) { + if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; + if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) + continue; + + preferred_pipe_candidates[num_preferred_candidates++] = i; + } + } + } + + return num_preferred_candidates; +} + +static unsigned int find_last_resort_pipe_candidates(const struct dc_state *existing_state, + const int pipe_count, + const unsigned int stream_id, + unsigned int *last_resort_pipe_candidates) +{ + unsigned int num_last_resort_candidates = 0; + int i; + + /* There are two cases where we would like to add a given pipe into the last + * candidate array: + * + * 1. If the pipe requires a change in OPP, for example during an MPC + * to ODM transiton. + * + * 2. If the pipe already has an enabled OTG. + */ + if (existing_state) { + for (i = 0; i < pipe_count; i++) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; + if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || + existing_state->res_ctx.pipe_ctx[i].stream_res.tg) + last_resort_pipe_candidates[num_last_resort_candidates++] = i; + } + } + + return num_last_resort_candidates; +} + +static bool is_pipe_in_candidate_array(const unsigned int pipe_idx, + const unsigned int *candidate_array, + const unsigned int candidate_array_size) +{ + int i; + + for (i = 0; i < candidate_array_size; i++) { + if (candidate_array[i] == pipe_idx) + return true; + } + + return false; +} + +static bool find_more_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, // The state we want to find a free mapping in + unsigned int stream_id, // The stream we want this pipe to drive + int *assigned_pipes, + int *assigned_pipe_count, + int pipes_needed, + const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to +{ + struct pipe_ctx *pipe = NULL; + unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; + unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; + unsigned int num_preferred_candidates = 0; + unsigned int num_last_resort_candidates = 0; + int i; + + if (existing_state) { + num_preferred_candidates = + find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); + + num_last_resort_candidates = + find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); + } + + // First see if any of the preferred are unmapped, and choose those instead + for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = preferred_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // We like to pair pipes starting from the higher order indicies for combining + for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { + // Ignore any pipes that are the preferred or last resort candidate + if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || + is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) + continue; + + pipe = &state->res_ctx.pipe_ctx[i]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = i; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // Only use the last resort pipe candidates as a last resort + for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; + if (!is_plane_using_pipe(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = last_resort_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + ASSERT(pipes_needed <= 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available + + return pipes_needed <= 0; +} + +static bool find_more_free_pipes(struct dml2_context *ctx, + struct dc_state *state, // The state we want to find a free mapping in + unsigned int stream_id, // The stream we want this pipe to drive + int *assigned_pipes, + int *assigned_pipe_count, + int pipes_needed, + const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to +{ + struct pipe_ctx *pipe = NULL; + unsigned int preferred_pipe_candidates[MAX_PIPES] = {0}; + unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0}; + unsigned int num_preferred_candidates = 0; + unsigned int num_last_resort_candidates = 0; + int i; + + if (existing_state) { + num_preferred_candidates = + find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates); + + num_last_resort_candidates = + find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates); + } + + // First see if any of the preferred are unmapped, and choose those instead + for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = preferred_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // We like to pair pipes starting from the higher order indicies for combining + for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) { + // Ignore any pipes that are the preferred or last resort candidate + if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) || + is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates)) + continue; + + pipe = &state->res_ctx.pipe_ctx[i]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = i; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + // Only use the last resort pipe candidates as a last resort + for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) { + pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]]; + if (is_pipe_free(pipe)) { + pipes_needed--; + // TODO: This doens't make sense really, pipe_idx should always be valid + pipe->pipe_idx = last_resort_pipe_candidates[i]; + assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx; + } + } + + ASSERT(pipes_needed == 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available + + return pipes_needed == 0; +} + +static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) +{ + bool sorted, swapped; + unsigned int cur_index; + int odm_slice_index; + + for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) { + // Sort each MPCC set + //Un-optimized bubble sort, but that's okay for array sizes <= 6 + + if (pipes->num_pipes_assigned_to_plane_for_mpcc_combine <= 1) + sorted = true; + else + sorted = false; + + cur_index = 0; + swapped = false; + while (!sorted) { + if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) { + swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1], + pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]); + + swapped = true; + } + + cur_index++; + + if (cur_index == pipes->num_pipes_assigned_to_plane_for_mpcc_combine - 1) { + cur_index = 0; + + if (swapped) + sorted = false; + else + sorted = true; + + swapped = false; + } + + } + } +} + +// For example, 3840 x 2160, ODM2:1 has a slice array of [1919, 3839], meaning, slice0 spans h_pixels 0->1919, and slice1 spans 1920->3840 +static void calculate_odm_slices(const struct dc_stream_state *stream, unsigned int odm_factor, unsigned int *odm_slice_end_x) +{ + unsigned int slice_size = 0; + int i; + + if (odm_factor < 1 || odm_factor > 4) { + ASSERT(false); + return; + } + + slice_size = stream->src.width / odm_factor; + + for (i = 0; i < odm_factor; i++) + odm_slice_end_x[i] = (slice_size * (i + 1)) - 1; + + odm_slice_end_x[odm_factor - 1] = stream->src.width - 1; +} + +static void add_odm_slice_to_odm_tree(struct dml2_context *ctx, + struct dc_state *state, + struct dc_pipe_mapping_scratch *scratch, + unsigned int odm_slice_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + // MPCC Combine + ODM Combine is not supported, so there should never be a case where the current plane + // has more than 1 pipe mapped to it for a given slice. + ASSERT(scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine == 1 || scratch->pipe_pool.num_pipes_assigned_to_plane_for_odm_combine == 1); + + for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + pipe = &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]]; + + if (scratch->mpc_info.prev_odm_pipe) + scratch->mpc_info.prev_odm_pipe->next_odm_pipe = pipe; + + pipe->prev_odm_pipe = scratch->mpc_info.prev_odm_pipe; + pipe->next_odm_pipe = NULL; + } + scratch->mpc_info.prev_odm_pipe = pipe; +} + +static struct pipe_ctx *add_plane_to_blend_tree(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_plane_state *plane, + struct dc_plane_pipe_pool *pipe_pool, + unsigned int odm_slice, + struct pipe_ctx *top_pipe) +{ + int i; + + for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + if (top_pipe) + top_pipe->bottom_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; + + pipe_pool->pipe_used[odm_slice][i] = true; + + state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].top_pipe = top_pipe; + state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].bottom_pipe = NULL; + + top_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]]; + } + + // After running the above loop, the top pipe actually ends up pointing to the bottom of this MPCC combine tree, so we are actually + // returning the bottom pipe here + return top_pipe; +} + +static unsigned int find_pipes_assigned_to_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id, unsigned int *pipes) +{ + int i; + unsigned int num_found = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->stream->stream_id == stream_id && !pipe->top_pipe && !pipe->prev_odm_pipe) { + while (pipe) { + pipes[num_found++] = pipe->pipe_idx; + pipe = pipe->next_odm_pipe; + } + break; + } + } + + return num_found; +} + +static struct pipe_ctx *assign_pipes_to_stream(struct dml2_context *ctx, struct dc_state *state, + const struct dc_stream_state *stream, + int odm_factor, + struct dc_plane_pipe_pool *pipe_pool, + const struct dc_state *existing_state) +{ + struct pipe_ctx *master_pipe; + unsigned int pipes_needed; + unsigned int pipes_assigned; + unsigned int pipes[MAX_PIPES] = {0}; + unsigned int next_pipe_to_assign; + int odm_slice; + + pipes_needed = odm_factor; + + master_pipe = find_master_pipe_of_stream(ctx, state, stream->stream_id); + ASSERT(master_pipe); + + pipes_assigned = find_pipes_assigned_to_stream(ctx, state, stream->stream_id, pipes); + + find_more_free_pipes(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); + + ASSERT(pipes_assigned == pipes_needed); + + next_pipe_to_assign = 0; + for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) + pipe_pool->pipes_assigned_to_plane[odm_slice][0] = pipes[next_pipe_to_assign++]; + + pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = 1; + pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; + + return master_pipe; +} + +static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct dc_state *state, + const struct dc_stream_state *stream, + const struct dc_plane_state *plane, + int odm_factor, + int mpc_factor, + int plane_index, + struct dc_plane_pipe_pool *pipe_pool, + const struct dc_state *existing_state) +{ + struct pipe_ctx *master_pipe = NULL; + unsigned int plane_id; + unsigned int pipes_needed; + unsigned int pipes_assigned; + unsigned int pipes[MAX_PIPES] = {0}; + unsigned int next_pipe_to_assign; + int odm_slice, mpc_slice; + + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { + ASSERT(false); + return master_pipe; + } + + pipes_needed = mpc_factor * odm_factor; + + master_pipe = find_master_pipe_of_plane(ctx, state, plane_id); + ASSERT(master_pipe); + + pipes_assigned = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes); + + find_more_pipes_for_stream(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state); + + ASSERT(pipes_assigned >= pipes_needed); + + next_pipe_to_assign = 0; + for (odm_slice = 0; odm_slice < odm_factor; odm_slice++) + for (mpc_slice = 0; mpc_slice < mpc_factor; mpc_slice++) + pipe_pool->pipes_assigned_to_plane[odm_slice][mpc_slice] = pipes[next_pipe_to_assign++]; + + pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = mpc_factor; + pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor; + + return master_pipe; +} + +static bool is_pipe_used(const struct dc_plane_pipe_pool *pool, unsigned int pipe_idx) +{ + int i, j; + + for (i = 0; i < pool->num_pipes_assigned_to_plane_for_odm_combine; i++) { + for (j = 0; j < pool->num_pipes_assigned_to_plane_for_mpcc_combine; j++) { + if (pool->pipes_assigned_to_plane[i][j] == pipe_idx && pool->pipe_used[i][j]) + return true; + } + } + + return false; +} + +static void free_pipe(struct pipe_ctx *pipe) +{ + memset(pipe, 0, sizeof(struct pipe_ctx)); +} + +static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, + const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index) +{ + int i; + bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + if (state->res_ctx.pipe_ctx[i].plane_state == plane && + state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id && + (!is_plane_duplicate || + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index) && + !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) { + free_pipe(&state->res_ctx.pipe_ctx[i]); + } + } +} + +static void remove_pipes_from_blend_trees(struct dml2_context *ctx, struct dc_state *state, struct dc_plane_pipe_pool *pipe_pool, unsigned int odm_slice) +{ + struct pipe_ctx *pipe; + int i; + + for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][0]]; + if (pipe->top_pipe) + pipe->top_pipe->bottom_pipe = pipe->bottom_pipe; + + if (pipe->bottom_pipe) + pipe->bottom_pipe = pipe->top_pipe; + + pipe_pool->pipe_used[odm_slice][i] = true; + } +} + +static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, + struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) +{ + int odm_slice_index; + struct pipe_ctx *master_pipe = NULL; + + + master_pipe = assign_pipes_to_stream(ctx, state, stream, scratch->odm_info.odm_factor, &scratch->pipe_pool, existing_state); + sort_pipes_for_splitting(&scratch->pipe_pool); + + for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { + remove_pipes_from_blend_trees(ctx, state, &scratch->pipe_pool, odm_slice_index); + + add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); + + ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, + master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][0]], true); + } +} + +static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane, + int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) +{ + int odm_slice_index; + unsigned int plane_id; + struct pipe_ctx *master_pipe = NULL; + int i; + + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { + ASSERT(false); + return; + } + + master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, + scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state); + sort_pipes_for_splitting(&scratch->pipe_pool); + + for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { + // Now we have a list of all pipes to be used for this plane/stream, now setup the tree. + scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index] = add_plane_to_blend_tree(ctx, state, + plane, + &scratch->pipe_pool, + odm_slice_index, + scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index]); + + add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index); + + for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) { + + ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state, + master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]], true); + } + } + + free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index); +} + +static unsigned int get_target_mpc_factor(struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_stream_status *status, + const struct dc_stream_state *stream, + int plane_idx) +{ + unsigned int plane_id; + unsigned int cfg_idx; + unsigned int mpc_factor; + + if (ctx->architecture == dml2_architecture_20) { + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); + cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; + } else if (ctx->architecture == dml2_architecture_21) { + if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { + struct dc_stream_state *main_stream; + struct dc_stream_status *main_stream_status; + + /* get stream id of main stream */ + main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); + if (!main_stream) { + ASSERT(false); + return 1; + } + + main_stream_status = ctx->config.callbacks.get_stream_status(state, main_stream); + if (!main_stream_status) { + ASSERT(false); + return 1; + } + + /* get plane id for associated main plane */ + get_plane_id(ctx, state, main_stream_status->plane_states[plane_idx], + main_stream->stream_id, plane_idx, &plane_id); + } else { + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); + } + + cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + mpc_factor = ctx->v21.mode_programming.programming->plane_programming[cfg_idx].num_dpps_required; + } else { + mpc_factor = 1; + ASSERT(false); + } + + /* For stereo timings, we need to pipe split */ + if (dml2_is_stereo_timing(stream)) + mpc_factor = 2; + + return mpc_factor; +} + +static unsigned int get_target_odm_factor( + const struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_stream_state *stream) +{ + unsigned int cfg_idx; + + if (ctx->architecture == dml2_architecture_20) { + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, stream->stream_id); + switch (disp_cfg->hw.ODMMode[cfg_idx]) { + case dml_odm_mode_bypass: + return 1; + case dml_odm_mode_combine_2to1: + return 2; + case dml_odm_mode_combine_4to1: + return 4; + default: + break; + } + } else if (ctx->architecture == dml2_architecture_21) { + if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) { + struct dc_stream_state *main_stream; + + /* get stream id of main stream */ + main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream); + if (!main_stream) + goto failed; + + /* get cfg idx for associated main stream */ + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, main_stream->stream_id); + } else { + cfg_idx = find_disp_cfg_idx_by_stream_id( + mapping, stream->stream_id); + } + + return ctx->v21.mode_programming.programming->stream_programming[cfg_idx].num_odms_required; + } + +failed: + ASSERT(false); + return 1; +} + +static unsigned int get_source_odm_factor(const struct dml2_context *ctx, + struct dc_state *state, + const struct dc_stream_state *stream) +{ + struct pipe_ctx *otg_master = ctx->config.callbacks.get_otg_master_for_stream(&state->res_ctx, stream); + + if (!otg_master) + return 0; + + return ctx->config.callbacks.get_odm_slice_count(otg_master); +} + +static unsigned int get_source_mpc_factor(const struct dml2_context *ctx, + struct dc_state *state, + const struct dc_plane_state *plane) +{ + struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0}; + int dpp_pipe_count = ctx->config.callbacks.get_dpp_pipes_for_plane(plane, + &state->res_ctx, dpp_pipes); + + ASSERT(dpp_pipe_count > 0); + return ctx->config.callbacks.get_mpc_slice_count(dpp_pipes[0]); +} + + +static void populate_mpc_factors_for_stream( + struct dml2_context *ctx, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + struct dc_state *state, + unsigned int stream_idx, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + const struct dc_stream_status *status = &state->stream_status[stream_idx]; + int i; + + for (i = 0; i < status->plane_count; i++) { + mpc_factors[i].source = get_source_mpc_factor(ctx, state, status->plane_states[i]); + mpc_factors[i].target = (odm_factor.target == 1) ? + get_target_mpc_factor(ctx, state, disp_cfg, mapping, status, state->streams[stream_idx], i) : 1; + } +} + +static void populate_odm_factors(const struct dml2_context *ctx, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + struct dc_state *state, + struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]) +{ + int i; + + for (i = 0; i < state->stream_count; i++) { + odm_factors[i].source = get_source_odm_factor(ctx, state, state->streams[i]); + odm_factors[i].target = get_target_odm_factor( + ctx, state, disp_cfg, mapping, state->streams[i]); + } +} + +static bool unmap_dc_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_state *existing_state, + const struct dc_stream_state *stream, + const struct dc_stream_status *status, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + int plane_idx; + bool result = true; + + for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) + if (mpc_factors[plane_idx].target < mpc_factors[plane_idx].source) + result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + status->plane_states[plane_idx], + mpc_factors[plane_idx].target); + if (odm_factor.target < odm_factor.source) + result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + stream, + odm_factor.target); + return result; +} + +static bool map_dc_pipes_for_stream(struct dml2_context *ctx, + struct dc_state *state, + const struct dc_state *existing_state, + const struct dc_stream_state *stream, + const struct dc_stream_status *status, + struct dml2_pipe_combine_factor odm_factor, + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES]) +{ + int plane_idx; + bool result = true; + + for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++) + if (mpc_factors[plane_idx].target > mpc_factors[plane_idx].source) + result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + status->plane_states[plane_idx], + mpc_factors[plane_idx].target); + if (odm_factor.target > odm_factor.source) + result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count( + state, + existing_state, + ctx->config.callbacks.dc->res_pool, + stream, + odm_factor.target); + return result; +} + +static bool map_dc_pipes_with_callbacks(struct dml2_context *ctx, + struct dc_state *state, + const struct dml_display_cfg_st *disp_cfg, + struct dml2_dml_to_dc_pipe_mapping *mapping, + const struct dc_state *existing_state) +{ + int i; + bool result = true; + + populate_odm_factors(ctx, disp_cfg, mapping, state, ctx->pipe_combine_scratch.odm_factors); + for (i = 0; i < state->stream_count; i++) + populate_mpc_factors_for_stream(ctx, disp_cfg, mapping, state, + i, ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + for (i = 0; i < state->stream_count; i++) + result &= unmap_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], + &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + for (i = 0; i < state->stream_count; i++) + result &= map_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i], + &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]); + + return result; +} + +bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state) +{ + int stream_index, plane_index, i; + + unsigned int stream_disp_cfg_index; + unsigned int plane_disp_cfg_index; + unsigned int disp_cfg_index_max; + + unsigned int plane_id; + unsigned int stream_id; + + const unsigned int *ODMMode, *DPPPerSurface; + unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; + struct dc_pipe_mapping_scratch scratch; + + if (ctx->config.map_dc_pipes_with_callbacks) + return map_dc_pipes_with_callbacks( + ctx, state, disp_cfg, mapping, existing_state); + + if (ctx->architecture == dml2_architecture_21) { + /* + * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes. + * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to + * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const. + * + */ + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + odm_mode_array[i] = ctx->v21.mode_programming.programming->stream_programming[i].num_odms_required; + dpp_per_surface_array[i] = ctx->v21.mode_programming.programming->plane_programming[i].num_dpps_required; + } + + ODMMode = (const unsigned int *)odm_mode_array; + DPPPerSurface = (const unsigned int *)dpp_per_surface_array; + disp_cfg_index_max = __DML2_WRAPPER_MAX_STREAMS_PLANES__; + } else { + ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; + DPPPerSurface = disp_cfg->hw.DPPPerSurface; + disp_cfg_index_max = __DML_NUM_PLANES__; + } + + for (stream_index = 0; stream_index < state->stream_count; stream_index++) { + memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch)); + + stream_id = state->streams[stream_index]->stream_id; + stream_disp_cfg_index = find_disp_cfg_idx_by_stream_id(mapping, stream_id); + if (stream_disp_cfg_index >= disp_cfg_index_max) + continue; + + if (ctx->architecture == dml2_architecture_20) { + if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } else if (ctx->architecture == dml2_architecture_21) { + /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0. + * This is not an issue with new resource management logic. This block ensure backcompat + * with legacy pipe management with updated DML. + * */ + if (ODMMode[stream_disp_cfg_index] == 1) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == 2) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == 4) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } + calculate_odm_slices(state->streams[stream_index], scratch.odm_info.odm_factor, scratch.odm_info.odm_slice_end_x); + + // If there are no planes, you still want to setup ODM... + if (state->stream_status[stream_index].plane_count == 0) { + map_pipes_for_stream(ctx, state, state->streams[stream_index], &scratch, existing_state); + } + + for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) { + // Planes are ordered top to bottom. + if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index], + stream_id, plane_index, &plane_id)) { + plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); + + // Setup mpc_info for this plane + scratch.mpc_info.prev_odm_pipe = NULL; + if (scratch.odm_info.odm_factor == 1 && plane_disp_cfg_index < disp_cfg_index_max) { + // If ODM combine is not inuse, then the number of pipes + // per plane is determined by MPC combine factor + scratch.mpc_info.mpc_factor = DPPPerSurface[plane_disp_cfg_index]; + + //For stereo timings, we need to pipe split + if (dml2_is_stereo_timing(state->streams[stream_index])) + scratch.mpc_info.mpc_factor = 2; + } else { + // If ODM combine is enabled, then we use at most 1 pipe per + // odm slice per plane, i.e. MPC combine is never used + scratch.mpc_info.mpc_factor = 1; + } + + ASSERT(scratch.odm_info.odm_factor * scratch.mpc_info.mpc_factor > 0); + + // Clear the pool assignment scratch (which is per plane) + memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool)); + + map_pipes_for_plane(ctx, state, state->streams[stream_index], + state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state); + } else { + // Plane ID cannot be generated, therefore no DML mapping can be performed. + ASSERT(false); + } + } + + } + + if (!validate_pipe_assignment(ctx, state, disp_cfg, mapping)) + ASSERT(false); + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!ctx->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + } + } + + if (ctx->config.callbacks.build_test_pattern_params && + pipe->stream && + pipe->prev_odm_pipe == NULL && + pipe->top_pipe == NULL) + ctx->config.callbacks.build_test_pattern_params(&state->res_ctx, pipe); + } + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h new file mode 100644 index 000000000000..1538b708d8be --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_resource_mgmt.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_DC_RESOURCE_MGMT_H__ +#define __DML2_DC_RESOURCE_MGMT_H__ + +#include "dml2_dc_types.h" + +struct dml2_context; +struct dml2_dml_to_dc_pipe_mapping; +struct dml_display_cfg_st; + +/* + * dml2_map_dc_pipes - Creates a pipe linkage in dc_state based on current display config. + * @ctx: Input dml2 context + * @state: Current dc_state to be updated. + * @disp_cfg: Current display config. + * @mapping: Pipe mapping logic structure to keep a track of pipes to be used. + * + * Based on ODM and DPPPersurface outputs calculated by the DML for the current display + * config, create a pipe linkage in dc_state which is then used by DC core. + * Make this function generic to be used by multiple DML versions. + * + * Return: True if pipe mapping and linking is successful, false otherwise. + */ + +bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h new file mode 100644 index 000000000000..7ca7f2a743c2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_dc_types.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +/* + * Wrapper header for externally defined types from DC. These types come from + * dc headers when building DML2 as part of DC, but are defined here when building + * DML2 as a standalone library (such as for unit testing). + */ + +#ifndef __DML2_DC_TYPES_H__ +#define __DML2_DC_TYPES_H__ + +#include "resource.h" +#include "core_types.h" +#include "dsc.h" +#include "clk_mgr.h" +#include "dc_state_priv.h" + +#endif //__DML2_DC_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h new file mode 100644 index 000000000000..55b3e3ca54f7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_internal_types.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_INTERNAL_TYPES_H__ +#define __DML2_INTERNAL_TYPES_H__ + +#include "dml2_dc_types.h" +#include "display_mode_core.h" +#include "dml2_wrapper.h" +#include "dml2_policy.h" + +#include "dml_top.h" +#include "dml21_wrapper.h" + +struct dml2_wrapper_optimize_configuration_params { + struct display_mode_lib_st *dml_core_ctx; + struct dml2_configuration_options *config; + struct ip_params_st *ip_params; + struct dml_display_cfg_st *cur_display_config; + struct dml_display_cfg_st *new_display_config; + const struct dml_mode_support_info_st *cur_mode_support_info; + struct dml_mode_eval_policy_st *cur_policy; + struct dml_mode_eval_policy_st *new_policy; +}; + +struct dml2_calculate_lowest_supported_state_for_temp_read_scratch { + struct dml_mode_support_info_st evaluation_info; + dml_float_t uclk_change_latencies[__DML_MAX_STATE_ARRAY_SIZE__]; + struct dml_display_cfg_st cur_display_config; + struct dml_display_cfg_st new_display_config; + struct dml_mode_eval_policy_st new_policy; + struct dml_mode_eval_policy_st cur_policy; +}; + +struct dml2_create_scratch { + struct dml2_policy_build_synthetic_soc_states_scratch build_synthetic_socbb_scratch; + struct soc_states_st in_states; +}; + +struct dml2_calculate_rq_and_dlg_params_scratch { + struct _vcs_dpi_dml_display_rq_regs_st rq_regs; + struct _vcs_dpi_dml_display_dlg_regs_st disp_dlg_regs; + struct _vcs_dpi_dml_display_ttu_regs_st disp_ttu_regs; +}; + +#define __DML2_WRAPPER_MAX_STREAMS_PLANES__ 6 + +struct dml2_dml_to_dc_pipe_mapping { + unsigned int disp_cfg_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool disp_cfg_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int disp_cfg_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool disp_cfg_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; +}; + +struct dml2_wrapper_scratch { + struct dml_display_cfg_st cur_display_config; + struct dml_display_cfg_st new_display_config; + struct dml_mode_eval_policy_st new_policy; + struct dml_mode_eval_policy_st cur_policy; + struct dml_mode_support_info_st mode_support_info; + struct dml_mode_support_ex_params_st mode_support_params; + + struct dummy_pstate_entry dummy_pstate_table[4]; + + struct dml2_create_scratch create_scratch; + struct dml2_calculate_lowest_supported_state_for_temp_read_scratch dml2_calculate_lowest_supported_state_for_temp_read_scratch; + struct dml2_calculate_rq_and_dlg_params_scratch calculate_rq_and_dlg_params_scratch; + + struct dml2_wrapper_optimize_configuration_params optimize_configuration_params; + struct dml2_policy_build_synthetic_soc_states_params build_synthetic_socbb_params; + + struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; + bool enable_flexible_pipe_mapping; + bool plane_duplicate_exists; + int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS]; +}; + +struct dml2_helper_det_policy_scratch { + int dpps_per_surface[MAX_PLANES]; +}; + +enum dml2_architecture { + dml2_architecture_20, + dml2_architecture_21 +}; + +struct prepare_mcache_programming_locals { + struct dml2_build_mcache_programming_in_out build_mcache_programming_params; +}; + +struct dml21_wrapper_scratch { + struct prepare_mcache_programming_locals prepare_mcache_locals; + struct pipe_ctx temp_pipe; +}; + +struct dml2_pipe_combine_factor { + unsigned int source; + unsigned int target; +}; + +struct dml2_pipe_combine_scratch { + struct dml2_pipe_combine_factor odm_factors[MAX_PIPES]; + struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES][MAX_PIPES]; +}; + +struct dml2_context { + enum dml2_architecture architecture; + struct dml2_configuration_options config; + struct dml2_helper_det_policy_scratch det_helper_scratch; + struct dml2_pipe_combine_scratch pipe_combine_scratch; + union { + struct { + struct display_mode_lib_st dml_core_ctx; + struct dml2_wrapper_scratch scratch; + struct dcn_watermarks g6_temp_read_watermark_set; + } v20; + struct { + struct dml21_wrapper_scratch scratch; + struct dml2_initialize_instance_in_out dml_init; + struct dml2_display_cfg display_config; + struct dml2_check_mode_supported_in_out mode_support; + struct dml2_build_mode_programming_in_out mode_programming; + struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; + } v21; + }; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c new file mode 100644 index 000000000000..66040c877d68 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.c @@ -0,0 +1,911 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dml2_dc_types.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_mall_phantom.h" + +unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context) +{ + uint32_t num_ways = 0; + uint32_t bytes_per_pixel = 0; + uint32_t cache_lines_used = 0; + uint32_t lines_per_way = 0; + uint32_t total_cache_lines = 0; + uint32_t bytes_in_mall = 0; + uint32_t num_mblks = 0; + uint32_t cache_lines_per_plane = 0; + uint32_t i = 0; + uint32_t mblk_width = 0; + uint32_t mblk_height = 0; + uint32_t full_vp_width_blk_aligned = 0; + uint32_t mall_alloc_width_blk_aligned = 0; + uint32_t mall_alloc_height_blk_aligned = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the phantom pipes + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; + mblk_width = ctx->config.mall_cfg.mblk_width_pixels; + mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels; + + /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) - + * FLOOR(vp_x_start, blk_width) + */ + full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x + + pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) + + (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width); + + /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */ + mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; + + /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ + mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mblk_height * mblk_height + mblk_height; + + /* full_mblk_width_ub_l/c = malldml2_mall_phantom.c_alloc_width_blk_aligned_l/c; + * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c; + * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c); + * (Should be divisible, but round up if not) + */ + num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) * + ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height); + bytes_in_mall = num_mblks * ctx->config.mall_cfg.mblk_size_bytes; + // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment + // (MALL is 64-byte aligned) + cache_lines_per_plane = bytes_in_mall / ctx->config.mall_cfg.cache_line_size_bytes + 2; + + // For DCC we must cache the meat surface, so double cache lines required + if (pipe->plane_state->dcc.enable) + cache_lines_per_plane *= 2; + cache_lines_used += cache_lines_per_plane; + } + } + + total_cache_lines = ctx->config.mall_cfg.max_cab_allocation_bytes / ctx->config.mall_cfg.cache_line_size_bytes; + lines_per_way = total_cache_lines / ctx->config.mall_cfg.cache_num_ways; + num_ways = cache_lines_used / lines_per_way; + if (cache_lines_used % lines_per_way > 0) + num_ways++; + + return num_ways; +} + +static void merge_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *context) +{ + int i; + + /* merge pipes if necessary */ + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // For now merge all pipes for SubVP since pipe split case isn't supported yet + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + ctx->config.svp_pstate.callbacks.release_dsc(&context->res_ctx, ctx->config.svp_pstate.callbacks.dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + } + } +} + +static bool all_pipes_have_stream_and_plane(struct dml2_context *ctx, const struct dc_state *context) +{ + int i; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (!pipe->plane_state) + return false; + } + return true; +} + +static bool mpo_in_use(const struct dc_state *context) +{ + int i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count > 1) + return true; + } + return false; +} + +/* + * dcn32_get_num_free_pipes: Calculate number of free pipes + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * Number of free pipes available in the context + */ +static unsigned int get_num_free_pipes(struct dml2_context *ctx, struct dc_state *state) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = ctx->config.dcn_pipe_count - num_pipes; + return free_pipes; +} + +/* + * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * @param dc: current dc state + * @param context: new dc state + * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * Return: + * True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context, unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = 2; //dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + /* SubVP pipe candidate requirements: + * - Refresh rate < 120hz + * - Not able to switch in vactive naturally (switching in active means the + * DET provides enough buffer to hide the P-State switch latency -- trying + * to combine this with SubVP can cause issues with the scheduling). + */ + if (pipe->plane_state && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 && + vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/* + * enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *state) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = ctx->config.dcn_pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = get_num_free_pipes(ctx, state); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < ctx->config.dcn_pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/* + * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us + + ctx->config.svp_pstate.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/* + * dml2_svp_drr_schedulable: Determine if SubVP + DRR config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * Return: + * bool - True if the SubVP + DRR config is schedulable, false otherwise + */ +bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_stream_state *phantom_stream; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) + break; + } + + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); + main_timing = &pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/* + * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe_mall_type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing); + } else if (found) { + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream); + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/* + * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle + * static analysis based on the case. + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Return: + * bool - True if statically schedulable, false otherwise + */ +bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe_mall_type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe_mall_type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(ctx, context); + } else if (pstate_change_type == dml_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(ctx, context); + } else if (pstate_change_type == dml_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state *state, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + unsigned int dc_pipe_idx, + unsigned int svp_height, + unsigned int svp_vstartup) +{ + unsigned int i; + double line_time, fp_and_sync_width_time; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000)); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(ctx->config.svp_pstate.subvp_fw_processing_delay_us + + ctx->config.svp_pstate.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ + phantom_vactive = svp_height + pstate_width_fw_delay_lines + ctx->config.svp_pstate.subvp_swath_height_margin_lines; + + phantom_stream->timing.v_front_porch = 1; + + line_time = phantom_stream->timing.h_total / ((double)phantom_stream->timing.pix_clk_100hz * 100); + fp_and_sync_width_time = (phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width) * line_time; + + if ((svp_vstartup * line_time) + fp_and_sync_width_time > cvt_rb_vblank_max) { + svp_vstartup = (cvt_rb_vblank_max - fp_and_sync_width_time) / line_time; + } + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = svp_vstartup; + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing +} + +static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup) +{ + struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; + struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream( + ctx->config.svp_pstate.callbacks.dc, + state, + ref_pipe->stream); + + /* stream has limited viewport and small timing */ + memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup); + + ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_stream, + ref_pipe->stream); + return phantom_stream; +} + +static void enable_phantom_plane(struct dml2_context *ctx, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + unsigned int dc_pipe_idx) +{ + struct dc_plane_state *phantom_plane = NULL; + struct dc_plane_state *prev_phantom_plane = NULL; + struct pipe_ctx *curr_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; + + while (curr_pipe) { + if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) { + phantom_plane = prev_phantom_plane; + } else { + phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane( + ctx->config.svp_pstate.callbacks.dc, + state, + curr_pipe->plane_state); + if (!phantom_plane) + return; + } + + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); + //phantom_plane->tiling_info.gfx10compatible.compat_level = curr_pipe->plane_state->tiling_info.gfx10compatible.compat_level; + phantom_plane->format = curr_pipe->plane_state->format; + phantom_plane->rotation = curr_pipe->plane_state->rotation; + phantom_plane->visible = curr_pipe->plane_state->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; + + ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); + + curr_pipe = curr_pipe->bottom_pipe; + prev_phantom_plane = phantom_plane; + } +} + +static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_state *state, unsigned int main_pipe_idx, unsigned int svp_height, unsigned int vstartup) +{ + struct dc_stream_state *phantom_stream = NULL; + unsigned int i; + + // The index of the DC pipe passed into this function is guarenteed to + // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't + // already have phantom pipe assigned, etc.) by previous checks. + phantom_stream = enable_phantom_stream(ctx, state, main_pipe_idx, svp_height, vstartup); + enable_phantom_plane(ctx, state, phantom_stream, main_pipe_idx); + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + // Build scaling params for phantom pipes which were newly added. + // We determine which phantom pipes were added by comparing with + // the phantom stream. + if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + pipe->stream->use_dynamic_meta = false; + pipe->plane_state->flip_immediate = false; + if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) { + // Log / remove phantom pipes since failed to build scaling params + } + } + } +} + +static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) +{ + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 }; + + for (i = 0; i < context->stream_count; i++) + if (context->streams[i] == stream) { + stream_status = &context->stream_status[i]; + break; + } + + if (stream_status == NULL) { + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) { + if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) + return false; + ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]); + } + + return true; +} + +bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state) +{ + int i; + bool removed_pipe = false; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + // build scaling params for phantom pipes + if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + phantom_stream = pipe->stream; + + remove_all_phantom_planes_for_stream(ctx, phantom_stream, state); + ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + + removed_pipe = true; + } + + if (pipe->plane_state) { + pipe->plane_state->is_phantom = false; + } + } + return removed_pipe; +} + + +/* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ +bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info) +{ + unsigned int dc_pipe_idx, dml_pipe_idx; + unsigned int svp_height, vstartup; + + if (ctx->config.svp_pstate.force_disable_subvp) + return false; + + if (!all_pipes_have_stream_and_plane(ctx, state)) + return false; + + if (mpo_in_use(state)) + return false; + + merge_pipes_for_subvp(ctx, state); + // to re-initialize viewport after the pipe merge + for (int i = 0; i < ctx->config.dcn_pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &state->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->plane_state || !pipe_ctx->stream) + continue; + + ctx->config.svp_pstate.callbacks.build_scaling_params(pipe_ctx); + } + + if (enough_pipes_for_subvp(ctx, state) && assign_subvp_pipe(ctx, state, &dc_pipe_idx)) { + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(ctx, state->res_ctx.pipe_ctx[dc_pipe_idx].stream->stream_id); + svp_height = mode_support_info->SubViewportLinesNeededInMALL[dml_pipe_idx]; + vstartup = dml_get_vstartup_calculated(&ctx->v20.dml_core_ctx, dml_pipe_idx); + + add_phantom_pipes_for_main_pipe(ctx, state, dc_pipe_idx, svp_height, vstartup); + + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h new file mode 100644 index 000000000000..9d64851f54e7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_mall_phantom.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_MALL_PHANTOM_H__ +#define __DML2_MALL_PHANTOM_H__ + +#include "dml2_dc_types.h" +#include "display_mode_core_structs.h" + +struct dml2_svp_helper_select_best_svp_candidate_params { + const struct dml_display_cfg_st *dml_config; + const struct dml_mode_support_info_st *mode_support_info; + const unsigned int blacklist; + unsigned int *candidate_index; +}; + +struct dml2_context; + +unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context); + +bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info); + +bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state); + +bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type); + +bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c new file mode 100644 index 000000000000..ef693f608d59 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.c @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml2_policy.h" + +static void get_optimal_ntuple( + const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; + + if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; + + if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; + + return limiting_bw_mbytes_sec; +} + +static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, + struct soc_states_st *table, + struct soc_state_bounding_box_st *entry) +{ + int index = 0; + int i = 0; + float net_bw_of_new_state = 0; + + get_optimal_ntuple(socbb, entry); + + if (table->num_states == 0) { + index = 0; + } else { + net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); + while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) { + index++; + if (index >= (int) table->num_states) + break; + } + + for (i = table->num_states; i > index; i--) { + table->state_array[i] = table->state_array[i - 1]; + } + //ASSERT(index < MAX_CLK_TABLE_SIZE); + } + + table->state_array[index] = *entry; + table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; + table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; + table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; + table->num_states++; +} + +static void remove_entry_from_table_at_index(struct soc_states_st *table, + unsigned int index) +{ + int i; + + if (table->num_states == 0) + return; + + for (i = index; i < (int) table->num_states - 1; i++) { + table->state_array[i] = table->state_array[i + 1]; + } + memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); +} + +int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, + struct dml2_policy_build_synthetic_soc_states_params *p) +{ + int i, j; + unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; + unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; + unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; + + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, + max_uclk_mhz = 0, max_socclk_mhz = 0; + + int num_uclk_dpms = 0, num_fclk_dpms = 0; + + for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + + if (p->in_states->state_array[i].fabricclk_mhz > 0) + num_fclk_dpms++; + if (p->in_states->state_array[i].dram_speed_mts > 0) + num_uclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) + return -1; + + p->out_states->num_states = 0; + + s->entry = p->in_states->state_array[0]; + + s->entry.dispclk_mhz = max_dispclk_mhz; + s->entry.dppclk_mhz = max_dppclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + s->entry.phyclk_mhz = max_phyclk_mhz; + + s->entry.dscclk_mhz = max_dispclk_mhz / 3; + s->entry.phyclk_mhz = max_phyclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + + // Insert all the DCFCLK STAs first + for (i = 0; i < p->num_dcfclk_stas; i++) { + s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = 0; + if (i > 0) + s->entry.socclk_mhz = max_socclk_mhz; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + if (i == 0) { + s->entry.socclk_mhz = min_socclk_mhz; + } else { + s->entry.socclk_mhz = max_socclk_mhz; + } + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert FCLK DPMs (if present) + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + } + // Add max FCLK + else { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Remove states that require higher clocks than are supported + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || + p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || + p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) + remove_entry_from_table_at_index(p->out_states, i); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Round up UCLK to DPMs + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; + break; + } + } + } + } + + // Clamp to min FCLK/DCFCLK + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; + } + if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { + p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < (int) p->out_states->num_states - 1) { + if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && + p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && + p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(p->out_states, i); + else + i++; + } + + return 0; +} + +void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) +{ + for (int i = 0; i < __DML_NUM_PLANES__; i++) { + policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? + policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; + policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; + policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; + } + + /* Change the default policy initializations as per spreadsheet. We might need to + * review and change them later on as per Jun's earlier comments. + */ + policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; + policy->UseMinimumRequiredDCFCLK = false; + policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->USRRetrainingRequiredFinal = true; + policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? + policy->NomDETInKByteOverrideEnable = false; + policy->NomDETInKByteOverrideValue = 0; + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; + policy->SynchronizeTimingsFinal = true; + policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; + policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? + policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? + if (project == dml_project_dcn35 || + project == dml_project_dcn36 || + project == dml_project_dcn351) { + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; + policy->EnhancedPrefetchScheduleAccelerationFinal = 0; + policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ + policy->UseOnlyMaxPrefetchModes = 1; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h new file mode 100644 index 000000000000..e83e05248592 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_policy.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DML2_POLICY_H__ +#define __DML2_POLICY_H__ + +#include "display_mode_core_structs.h" + +struct dml2_policy_build_synthetic_soc_states_params { + const struct soc_bounding_box_st *in_bbox; + struct soc_states_st *in_states; + struct soc_states_st *out_states; + int *dcfclk_stas_mhz; + int num_dcfclk_stas; +}; + +struct dml2_policy_build_synthetic_soc_states_scratch { + struct soc_state_bounding_box_st entry; +}; + +int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, + struct dml2_policy_build_synthetic_soc_states_params *p); + +void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c new file mode 100644 index 000000000000..d834cb595afa --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.c @@ -0,0 +1,1528 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "dml2_internal_types.h" +#include "dml2_translation_helper.h" + +#define NUM_DCFCLK_STAS 5 +#define NUM_DCFCLK_STAS_NEW 8 + +void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) +{ + switch (dml2->v20.dml_core_ctx.project) { + case dml_project_dcn32: + case dml_project_dcn321: + default: + // Hardcoded values for DCN32x + out->vblank_nom_default_us = 600; + out->rob_buffer_size_kbytes = 128; + out->config_return_buffer_size_in_kbytes = 1280; + out->config_return_buffer_segment_size_in_kbytes = 64; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 22; + out->zero_size_buffer_entries = 512; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->dcc_meta_buffer_size_bytes = 6272; + out->gpuvm_max_page_table_levels = 4; + out->hostvm_max_page_table_levels = 0; + out->pixel_chunk_size_kbytes = 8; + //out->alpha_pixel_chunk_size_kbytes; + out->min_pixel_chunk_size_bytes = 1024; + out->meta_chunk_size_kbytes = 2; + out->min_meta_chunk_size_bytes = 256; + out->writeback_chunk_size_kbytes = 8; + out->line_buffer_size_bits = 1171920; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + //Number of pipes after DCN Pipe harvesting + out->max_num_dpp = dml2->config.dcn_pipe_count; + out->max_num_otg = dml2->config.dcn_pipe_count; + out->max_num_wb = 1; + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1; + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->cursor_buffer_size = 16; + out->cursor_chunk_size = 2; + out->dispclk_delay_subtotal = 125; + out->max_inter_dcn_tile_repeaters = 8; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->writeback_line_buffer_buffer_size = 0; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 6016; + out->dsc422_native_support = true; + out->dcc_supported = true; + out->ptoi_supported = false; + + out->gpuvm_enable = false; + out->hostvm_enable = false; + out->cursor_64bpp_support = false; + out->dynamic_metadata_vm_enabled = false; + + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 2; + out->max_num_dp2p0_streams = 4; + break; + + case dml_project_dcn35: + case dml_project_dcn351: + case dml_project_dcn36: + out->rob_buffer_size_kbytes = 64; + out->config_return_buffer_size_in_kbytes = 1792; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 32; + out->zero_size_buffer_entries = 512; + out->pixel_chunk_size_kbytes = 8; + out->alpha_pixel_chunk_size_kbytes = 4; + out->min_pixel_chunk_size_bytes = 1024; + out->meta_chunk_size_kbytes = 2; + out->min_meta_chunk_size_bytes = 256; + out->writeback_chunk_size_kbytes = 8; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->dcc_meta_buffer_size_bytes = 6272; + out->gpuvm_enable = 1; + out->hostvm_enable = 1; + out->gpuvm_max_page_table_levels = 1; + out->hostvm_max_page_table_levels = 2; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 6016; + out->dsc422_native_support = 1; + out->line_buffer_size_bits = 986880; + out->dcc_supported = 1; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + out->max_num_dpp = 4; + out->max_num_otg = 4; + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 2; + out->max_num_dp2p0_streams = 4; + out->max_num_wb = 1; + + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1.11; + + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->dispclk_delay_subtotal = 125; + + out->dynamic_metadata_vm_enabled = false; + out->max_inter_dcn_tile_repeaters = 8; + out->cursor_buffer_size = 16; // kBytes + out->cursor_chunk_size = 2; // kBytes + + out->writeback_line_buffer_buffer_size = 0; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->ptoi_supported = 0; + + out->vblank_nom_default_us = 668; /*not in dml, but in programming guide, hard coded in dml2_translate_ip_params*/ + out->config_return_buffer_segment_size_in_kbytes = 64; /*required, but not exist,, hard coded in dml2_translate_ip_params*/ + break; + + case dml_project_dcn401: + // Hardcoded values for DCN4m + out->vblank_nom_default_us = 668; //600; + out->rob_buffer_size_kbytes = 192; //128; + out->config_return_buffer_size_in_kbytes = 1344; //1280; + out->config_return_buffer_segment_size_in_kbytes = 64; + out->compressed_buffer_segment_size_in_kbytes = 64; + out->meta_fifo_size_in_kentries = 22; + out->dpte_buffer_size_in_pte_reqs_luma = 68; + out->dpte_buffer_size_in_pte_reqs_chroma = 36; + out->gpuvm_max_page_table_levels = 4; + out->pixel_chunk_size_kbytes = 8; + out->alpha_pixel_chunk_size_kbytes = 4; + out->min_pixel_chunk_size_bytes = 1024; + out->writeback_chunk_size_kbytes = 8; + out->line_buffer_size_bits = 1171920; + out->max_line_buffer_lines = 32; + out->writeback_interface_buffer_size_kbytes = 90; + //Number of pipes after DCN Pipe harvesting + out->max_num_dpp = dml2->config.dcn_pipe_count; + out->max_num_otg = dml2->config.dcn_pipe_count; + out->max_num_wb = 1; + out->max_dchub_pscl_bw_pix_per_clk = 4; + out->max_pscl_lb_bw_pix_per_clk = 2; + out->max_lb_vscl_bw_pix_per_clk = 4; + out->max_vscl_hscl_bw_pix_per_clk = 4; + out->max_hscl_ratio = 6; + out->max_vscl_ratio = 6; + out->max_hscl_taps = 8; + out->max_vscl_taps = 8; + out->dispclk_ramp_margin_percent = 1; + out->dppclk_delay_subtotal = 47; + out->dppclk_delay_scl = 50; + out->dppclk_delay_scl_lb_only = 16; + out->dppclk_delay_cnvc_formatter = 28; + out->dppclk_delay_cnvc_cursor = 6; + out->dispclk_delay_subtotal = 125; + out->cursor_buffer_size = 24; //16 + out->cursor_chunk_size = 2; + out->max_inter_dcn_tile_repeaters = 8; + out->writeback_max_hscl_ratio = 1; + out->writeback_max_vscl_ratio = 1; + out->writeback_min_hscl_ratio = 1; + out->writeback_min_vscl_ratio = 1; + out->writeback_max_hscl_taps = 1; + out->writeback_max_vscl_taps = 1; + out->writeback_line_buffer_buffer_size = 0; + out->num_dsc = 4; + out->maximum_dsc_bits_per_component = 12; + out->maximum_pixels_per_line_per_dsc_unit = 5760; + out->dsc422_native_support = true; + out->dcc_supported = true; + out->ptoi_supported = false; + + out->gpuvm_enable = false; + out->hostvm_enable = false; + out->cursor_64bpp_support = true; //false; + out->dynamic_metadata_vm_enabled = false; + + out->max_num_hdmi_frl_outputs = 1; + out->max_num_dp2p0_outputs = 4; //2; + out->max_num_dp2p0_streams = 4; + break; + } +} + +void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) +{ + out->dprefclk_mhz = dml2->config.bbox_overrides.dprefclk_mhz; + out->xtalclk_mhz = dml2->config.bbox_overrides.xtalclk_mhz; + out->pcierefclk_mhz = 100; + out->refclk_mhz = dml2->config.bbox_overrides.dchub_refclk_mhz; + + out->max_outstanding_reqs = 512; + out->pct_ideal_sdp_bw_after_urgent = 100; + out->pct_ideal_fabric_bw_after_urgent = 67; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 20; + out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = 60; + out->pct_ideal_dram_bw_after_urgent_vm_only = 30; + out->pct_ideal_dram_bw_after_urgent_strobe = 67; + out->max_avg_sdp_bw_use_normal_percent = 80; + out->max_avg_fabric_bw_use_normal_percent = 60; + out->max_avg_dram_bw_use_normal_percent = 15; + out->max_avg_dram_bw_use_normal_strobe_percent = 50; + + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = 4096; + out->return_bus_width_bytes = 64; + out->dram_channel_width_bytes = 2; + out->fabric_datapath_to_dcn_data_return_bytes = 64; + out->hostvm_min_page_size_kbytes = 0; + out->gpuvm_min_page_size_kbytes = 256; + out->phy_downspread_percent = 0.38; + out->dcn_downspread_percent = 0.5; + out->dispclk_dppclk_vco_speed_mhz = dml2->config.bbox_overrides.disp_pll_vco_speed_mhz; + out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64 or 32 MB; + + out->do_urgent_latency_adjustment = true; + + switch (dml2->v20.dml_core_ctx.project) { + + case dml_project_dcn32: + default: + out->num_chans = 24; + out->round_trip_ping_latency_dcfclk_cycles = 263; + out->smn_latency_us = 2; + break; + + case dml_project_dcn321: + out->num_chans = 8; + out->round_trip_ping_latency_dcfclk_cycles = 207; + out->smn_latency_us = 0; + break; + + case dml_project_dcn35: + case dml_project_dcn351: + case dml_project_dcn36: + out->num_chans = 4; + out->round_trip_ping_latency_dcfclk_cycles = 106; + out->smn_latency_us = 2; + out->dispclk_dppclk_vco_speed_mhz = 3600; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; + break; + + + case dml_project_dcn401: + out->pct_ideal_fabric_bw_after_urgent = 76; //67; + out->max_avg_sdp_bw_use_normal_percent = 75; //80; + out->max_avg_fabric_bw_use_normal_percent = 57; //60; + + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 0; //4096; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 0; //4096; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = 0; //4096; + + out->num_chans = 16; + out->round_trip_ping_latency_dcfclk_cycles = 1000; //263; + out->smn_latency_us = 0; //2 us + out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64; + break; + } + /* ---Overrides if available--- */ + if (dml2->config.bbox_overrides.dram_num_chan) + out->num_chans = dml2->config.bbox_overrides.dram_num_chan; + + if (dml2->config.bbox_overrides.dram_chanel_width_bytes) + out->dram_channel_width_bytes = dml2->config.bbox_overrides.dram_chanel_width_bytes; +} + +void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, + const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) +{ + struct dml2_policy_build_synthetic_soc_states_scratch *s = &dml2->v20.scratch.create_scratch.build_synthetic_socbb_scratch; + struct dml2_policy_build_synthetic_soc_states_params *p = &dml2->v20.scratch.build_synthetic_socbb_params; + unsigned int dcfclk_stas_mhz[NUM_DCFCLK_STAS] = {0}; + unsigned int dcfclk_stas_mhz_new[NUM_DCFCLK_STAS_NEW] = {0}; + unsigned int dml_project = dml2->v20.dml_core_ctx.project; + + unsigned int i = 0; + unsigned int transactions_per_mem_clock = 16; // project specific, depends on used Memory type + + if (dml_project == dml_project_dcn351) { + p->dcfclk_stas_mhz = dcfclk_stas_mhz_new; + p->num_dcfclk_stas = NUM_DCFCLK_STAS_NEW; + } else { + p->dcfclk_stas_mhz = dcfclk_stas_mhz; + p->num_dcfclk_stas = NUM_DCFCLK_STAS; + } + + p->in_bbox = in_bbox; + p->out_states = out; + p->in_states = &dml2->v20.scratch.create_scratch.in_states; + + + /* Initial hardcoded values */ + switch (dml2->v20.dml_core_ctx.project) { + + case dml_project_dcn32: + default: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 620.0; + p->in_states->state_array[0].dscclk_mhz = 716.667; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 625; + p->in_states->state_array[0].dtbclk_mhz = 1564.0; + p->in_states->state_array[0].fabricclk_mhz = 450.0; + p->in_states->state_array[0].dcfclk_mhz = 300.0; + p->in_states->state_array[0].dispclk_mhz = 2150.0; + p->in_states->state_array[0].dppclk_mhz = 2150.0; + p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 42.97; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 49.94; + p->in_states->state_array[0].fclk_change_latency_us = 20; + p->in_states->state_array[0].usr_retraining_latency_us = 2; + + p->in_states->state_array[1].socclk_mhz = 1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2500.0; + p->in_states->state_array[1].dcfclk_mhz = 1564.0; + p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; + break; + + case dml_project_dcn321: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 582.0; + p->in_states->state_array[0].dscclk_mhz = 573.333; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 313; + p->in_states->state_array[0].dtbclk_mhz = 1564.0; + p->in_states->state_array[0].fabricclk_mhz = 450.0; + p->in_states->state_array[0].dcfclk_mhz = 300.0; + p->in_states->state_array[0].dispclk_mhz = 1720.0; + p->in_states->state_array[0].dppclk_mhz = 1720.0; + p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock; + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 19.95; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 24.36; + p->in_states->state_array[0].fclk_change_latency_us = 7; + p->in_states->state_array[0].usr_retraining_latency_us = 0; + + p->in_states->state_array[1].socclk_mhz = 1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2250.0; + p->in_states->state_array[1].dcfclk_mhz = 1434.0; + p->in_states->state_array[1].dram_speed_mts = 1000 * transactions_per_mem_clock; + break; + + + case dml_project_dcn401: + p->in_states->num_states = 2; + transactions_per_mem_clock = 16; + p->in_states->state_array[0].socclk_mhz = 300; //620.0; + p->in_states->state_array[0].dscclk_mhz = 666.667; //716.667; + p->in_states->state_array[0].phyclk_mhz = 810; + p->in_states->state_array[0].phyclk_d18_mhz = 667; + p->in_states->state_array[0].phyclk_d32_mhz = 625; + p->in_states->state_array[0].dtbclk_mhz = 2000; //1564.0; + p->in_states->state_array[0].fabricclk_mhz = 300; //450.0; + p->in_states->state_array[0].dcfclk_mhz = 200; //300.0; + p->in_states->state_array[0].dispclk_mhz = 2000; //2150.0; + p->in_states->state_array[0].dppclk_mhz = 2000; //2150.0; + p->in_states->state_array[0].dram_speed_mts = 97 * transactions_per_mem_clock; //100 * + + p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4; + p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0; + p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0; + p->in_states->state_array[0].writeback_latency_us = 12; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1; + p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 1000; //3000; + p->in_states->state_array[0].sr_exit_z8_time_us = 0; + p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0; + p->in_states->state_array[0].dram_clock_change_latency_us = 400; + p->in_states->state_array[0].use_ideal_dram_bw_strobe = true; + p->in_states->state_array[0].sr_exit_time_us = 15.70; //42.97; + p->in_states->state_array[0].sr_enter_plus_exit_time_us = 20.20; //49.94; + p->in_states->state_array[0].fclk_change_latency_us = 0; //20; + p->in_states->state_array[0].usr_retraining_latency_us = 0; //2; + + p->in_states->state_array[1].socclk_mhz = 1600; //1200.0; + p->in_states->state_array[1].fabricclk_mhz = 2500; //2500.0; + p->in_states->state_array[1].dcfclk_mhz = 1800; //1564.0; + p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock; + break; + } + + /* Override from passed values, if available */ + for (i = 0; i < p->in_states->num_states; i++) { + if (dml2->config.bbox_overrides.sr_exit_latency_us) { + p->in_states->state_array[i].sr_exit_time_us = + dml2->config.bbox_overrides.sr_exit_latency_us; + } + + if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { + p->in_states->state_array[i].sr_enter_plus_exit_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; + } + + if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { + p->in_states->state_array[i].sr_exit_z8_time_us = + dml2->config.bbox_overrides.sr_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { + p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.urgent_latency_us) { + p->in_states->state_array[i].urgent_latency_pixel_data_only_us = + dml2->config.bbox_overrides.urgent_latency_us; + } + + if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { + p->in_states->state_array[i].dram_clock_change_latency_us = + dml2->config.bbox_overrides.dram_clock_change_latency_us; + } + + if (dml2->config.bbox_overrides.fclk_change_latency_us) { + p->in_states->state_array[i].fclk_change_latency_us = + dml2->config.bbox_overrides.fclk_change_latency_us; + } + } + + /* DCFCLK stas values are project specific */ + if ((dml2->v20.dml_core_ctx.project == dml_project_dcn32) || + (dml2->v20.dml_core_ctx.project == dml_project_dcn321)) { + p->dcfclk_stas_mhz[0] = p->in_states->state_array[0].dcfclk_mhz; + p->dcfclk_stas_mhz[1] = 615; + p->dcfclk_stas_mhz[2] = 906; + p->dcfclk_stas_mhz[3] = 1324; + p->dcfclk_stas_mhz[4] = p->in_states->state_array[1].dcfclk_mhz; + } else if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && + dml2->v20.dml_core_ctx.project != dml_project_dcn36 && + dml2->v20.dml_core_ctx.project != dml_project_dcn351) { + p->dcfclk_stas_mhz[0] = 300; + p->dcfclk_stas_mhz[1] = 615; + p->dcfclk_stas_mhz[2] = 906; + p->dcfclk_stas_mhz[3] = 1324; + p->dcfclk_stas_mhz[4] = 1500; + } + /* Copy clocks tables entries, if available */ + if (dml2->config.bbox_overrides.clks_table.num_states) { + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { + p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; + } + + p->dcfclk_stas_mhz[0] = dml2->config.bbox_overrides.clks_table.clk_entries[0].dcfclk_mhz; + if (i > 1) + p->dcfclk_stas_mhz[4] = dml2->config.bbox_overrides.clks_table.clk_entries[i-1].dcfclk_mhz; + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels; i++) { + p->in_states->state_array[i].fabricclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].fclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels; i++) { + p->in_states->state_array[i].dram_speed_mts = + dml2->config.bbox_overrides.clks_table.clk_entries[i].memclk_mhz * transactions_per_mem_clock; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels; i++) { + p->in_states->state_array[i].socclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].socclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + } + + for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { + p->in_states->state_array[i].dispclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz; + p->in_states->state_array[i].dppclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz; + } + } + + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn36 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, + max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0; + + for (i = 0; i < p->in_states->num_states; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + } + + for (i = 0; i < p->in_states->num_states; i++) { + /* Independent states - including base (unlisted) parameters from state 0. */ + p->out_states->state_array[i] = p->in_states->state_array[0]; + + p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz; + p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + + p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + + /* Dependent states. */ + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; + p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz; + } + + p->out_states->num_states = p->in_states->num_states; + } else { + dml2_policy_build_synthetic_soc_states(s, p); + } +} + +void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) +{ + const struct _vcs_dpi_ip_params_st *in_ip_params = &in->dml.ip; + /* Copy over the IP params tp dml2_ctx */ + out->compressed_buffer_segment_size_in_kbytes = in_ip_params->compressed_buffer_segment_size_in_kbytes; + out->config_return_buffer_size_in_kbytes = in_ip_params->config_return_buffer_size_in_kbytes; + out->cursor_buffer_size = in_ip_params->cursor_buffer_size; + out->cursor_chunk_size = in_ip_params->cursor_chunk_size; + out->dcc_meta_buffer_size_bytes = in_ip_params->dcc_meta_buffer_size_bytes; + out->dcc_supported = in_ip_params->dcc_supported; + out->dispclk_delay_subtotal = in_ip_params->dispclk_delay_subtotal; + out->dispclk_ramp_margin_percent = in_ip_params->dispclk_ramp_margin_percent; + out->dppclk_delay_cnvc_cursor = in_ip_params->dppclk_delay_cnvc_cursor; + out->dppclk_delay_cnvc_formatter = in_ip_params->dppclk_delay_cnvc_formatter; + out->dppclk_delay_scl = in_ip_params->dppclk_delay_scl; + out->dppclk_delay_scl_lb_only = in_ip_params->dppclk_delay_scl_lb_only; + out->dppclk_delay_subtotal = in_ip_params->dppclk_delay_subtotal; + out->dpte_buffer_size_in_pte_reqs_chroma = in_ip_params->dpte_buffer_size_in_pte_reqs_chroma; + out->dpte_buffer_size_in_pte_reqs_luma = in_ip_params->dpte_buffer_size_in_pte_reqs_luma; + out->dsc422_native_support = in_ip_params->dsc422_native_support; + out->dynamic_metadata_vm_enabled = in_ip_params->dynamic_metadata_vm_enabled; + out->gpuvm_enable = in_ip_params->gpuvm_enable; + out->gpuvm_max_page_table_levels = in_ip_params->gpuvm_max_page_table_levels; + out->hostvm_enable = in_ip_params->hostvm_enable; + out->hostvm_max_page_table_levels = in_ip_params->hostvm_max_page_table_levels; + out->line_buffer_size_bits = in_ip_params->line_buffer_size_bits; + out->maximum_dsc_bits_per_component = in_ip_params->maximum_dsc_bits_per_component; + out->maximum_pixels_per_line_per_dsc_unit = in_ip_params->maximum_pixels_per_line_per_dsc_unit; + out->max_dchub_pscl_bw_pix_per_clk = in_ip_params->max_dchub_pscl_bw_pix_per_clk; + out->max_hscl_ratio = in_ip_params->max_hscl_ratio; + out->max_hscl_taps = in_ip_params->max_hscl_taps; + out->max_inter_dcn_tile_repeaters = in_ip_params->max_inter_dcn_tile_repeaters; + out->max_lb_vscl_bw_pix_per_clk = in_ip_params->max_lb_vscl_bw_pix_per_clk; + out->max_line_buffer_lines = in_ip_params->max_line_buffer_lines; + out->max_num_dp2p0_outputs = in_ip_params->max_num_dp2p0_outputs; + out->max_num_dp2p0_streams = in_ip_params->max_num_dp2p0_streams; + out->max_num_dpp = in_ip_params->max_num_dpp; + out->max_num_hdmi_frl_outputs = in_ip_params->max_num_hdmi_frl_outputs; + out->max_num_otg = in_ip_params->max_num_otg; + out->max_num_wb = in_ip_params->max_num_wb; + out->max_pscl_lb_bw_pix_per_clk = in_ip_params->max_pscl_lb_bw_pix_per_clk; + out->max_vscl_hscl_bw_pix_per_clk = in_ip_params->max_vscl_hscl_bw_pix_per_clk; + out->max_vscl_ratio = in_ip_params->max_vscl_ratio; + out->max_vscl_taps = in_ip_params->max_vscl_taps; + out->meta_chunk_size_kbytes = in_ip_params->meta_chunk_size_kbytes; + out->meta_fifo_size_in_kentries = in_ip_params->meta_fifo_size_in_kentries; + out->min_meta_chunk_size_bytes = in_ip_params->min_meta_chunk_size_bytes; + out->min_pixel_chunk_size_bytes = in_ip_params->min_pixel_chunk_size_bytes; + out->num_dsc = in_ip_params->num_dsc; + out->pixel_chunk_size_kbytes = in_ip_params->pixel_chunk_size_kbytes; + out->ptoi_supported = in_ip_params->ptoi_supported; + out->rob_buffer_size_kbytes = in_ip_params->rob_buffer_size_kbytes; + out->writeback_chunk_size_kbytes = in_ip_params->writeback_chunk_size_kbytes; + out->writeback_interface_buffer_size_kbytes = in_ip_params->writeback_interface_buffer_size_kbytes; + out->writeback_line_buffer_buffer_size = in_ip_params->writeback_line_buffer_buffer_size; + out->writeback_max_hscl_ratio = in_ip_params->writeback_max_hscl_ratio; + out->writeback_max_hscl_taps = in_ip_params->writeback_max_hscl_taps; + out->writeback_max_vscl_ratio = in_ip_params->writeback_max_vscl_ratio; + out->writeback_max_vscl_taps = in_ip_params->writeback_max_vscl_taps; + out->writeback_min_hscl_ratio = in_ip_params->writeback_min_hscl_ratio; + out->writeback_min_vscl_ratio = in_ip_params->writeback_min_vscl_ratio; + out->zero_size_buffer_entries = in_ip_params->zero_size_buffer_entries; + + /* As per hardcoded reference / discussions */ + out->config_return_buffer_segment_size_in_kbytes = 64; + //out->vblank_nom_default_us = 600; + out->vblank_nom_default_us = in_ip_params->VBlankNomDefaultUS; +} + +void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st *out) +{ + const struct _vcs_dpi_soc_bounding_box_st *in_soc_params = &in->dml.soc; + /* Copy over the SOCBB params to dml2_ctx */ + out->dispclk_dppclk_vco_speed_mhz = in_soc_params->dispclk_dppclk_vco_speed_mhz; + out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; + out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; + out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; + out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; + out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; + out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; + out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; + out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; + out->max_avg_dram_bw_use_normal_strobe_percent = in_soc_params->max_avg_dram_bw_use_normal_strobe_percent; + out->max_avg_sdp_bw_use_normal_percent = in_soc_params->max_avg_sdp_bw_use_normal_percent; + out->max_outstanding_reqs = in_soc_params->max_request_size_bytes; + out->num_chans = in_soc_params->num_chans; + out->pct_ideal_dram_bw_after_urgent_strobe = in_soc_params->pct_ideal_dram_bw_after_urgent_strobe; + out->pct_ideal_dram_bw_after_urgent_vm_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_vm_only; + out->pct_ideal_fabric_bw_after_urgent = in_soc_params->pct_ideal_fabric_bw_after_urgent; + out->pct_ideal_sdp_bw_after_urgent = in_soc_params->pct_ideal_sdp_bw_after_urgent; + out->phy_downspread_percent = in_soc_params->downspread_percent; + out->refclk_mhz = 50; // As per hardcoded reference. + out->return_bus_width_bytes = in_soc_params->return_bus_width_bytes; + out->round_trip_ping_latency_dcfclk_cycles = in_soc_params->round_trip_ping_latency_dcfclk_cycles; + out->smn_latency_us = in_soc_params->smn_latency_us; + out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + out->urgent_out_of_order_return_per_channel_pixel_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_only_bytes; + out->urgent_out_of_order_return_per_channel_vm_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_vm_only_bytes; + out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; + out->pct_ideal_dram_bw_after_urgent_pixel_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_only; + out->dcn_downspread_percent = in_soc_params->dcn_downspread_percent; +} + +void dml2_translate_soc_states(const struct dc *dc, struct soc_states_st *out, int num_states) +{ + unsigned int i = 0; + out->num_states = num_states; + + for (i = 0; i < out->num_states; i++) { + out->state_array[i].dcfclk_mhz = dc->dml.soc.clock_limits[i].dcfclk_mhz; + out->state_array[i].dispclk_mhz = dc->dml.soc.clock_limits[i].dispclk_mhz; + out->state_array[i].dppclk_mhz = dc->dml.soc.clock_limits[i].dppclk_mhz; + out->state_array[i].dram_speed_mts = dc->dml.soc.clock_limits[i].dram_speed_mts; + out->state_array[i].dtbclk_mhz = dc->dml.soc.clock_limits[i].dtbclk_mhz; + out->state_array[i].socclk_mhz = dc->dml.soc.clock_limits[i].socclk_mhz; + out->state_array[i].fabricclk_mhz = dc->dml.soc.clock_limits[i].fabricclk_mhz; + out->state_array[i].dscclk_mhz = dc->dml.soc.clock_limits[i].dscclk_mhz; + out->state_array[i].phyclk_d18_mhz = dc->dml.soc.clock_limits[i].phyclk_d18_mhz; + out->state_array[i].phyclk_d32_mhz = dc->dml.soc.clock_limits[i].phyclk_d32_mhz; + out->state_array[i].phyclk_mhz = dc->dml.soc.clock_limits[i].phyclk_mhz; + out->state_array[i].sr_enter_plus_exit_time_us = dc->dml.soc.sr_enter_plus_exit_time_us; + out->state_array[i].sr_exit_time_us = dc->dml.soc.sr_exit_time_us; + out->state_array[i].fclk_change_latency_us = dc->dml.soc.fclk_change_latency_us; + out->state_array[i].dram_clock_change_latency_us = dc->dml.soc.dram_clock_change_latency_us; + out->state_array[i].usr_retraining_latency_us = dc->dml.soc.usr_retraining_latency_us; + out->state_array[i].writeback_latency_us = dc->dml.soc.writeback_latency_us; + /* Driver initialized values for these are different than the spreadsheet. Use the + * spreadsheet ones for now. We need to decided which ones to use. + */ + out->state_array[i].sr_exit_z8_time_us = dc->dml.soc.sr_exit_z8_time_us; + out->state_array[i].sr_enter_plus_exit_z8_time_us = dc->dml.soc.sr_enter_plus_exit_z8_time_us; + //out->state_array[i].sr_exit_z8_time_us = 5.20; + //out->state_array[i].sr_enter_plus_exit_z8_time_us = 9.60; + out->state_array[i].use_ideal_dram_bw_strobe = true; + out->state_array[i].urgent_latency_pixel_data_only_us = dc->dml.soc.urgent_latency_pixel_data_only_us; + out->state_array[i].urgent_latency_pixel_mixed_with_vm_data_us = dc->dml.soc.urgent_latency_pixel_mixed_with_vm_data_us; + out->state_array[i].urgent_latency_vm_data_only_us = dc->dml.soc.urgent_latency_vm_data_only_us; + out->state_array[i].urgent_latency_adjustment_fabric_clock_component_us = dc->dml.soc.urgent_latency_adjustment_fabric_clock_component_us; + out->state_array[i].urgent_latency_adjustment_fabric_clock_reference_mhz = dc->dml.soc.urgent_latency_adjustment_fabric_clock_reference_mhz; + } +} + +static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +{ + dml_uint_t hblank_start, vblank_start; + + out->HActive[location] = in->timing.h_addressable + in->timing.h_border_left + in->timing.h_border_right; + out->VActive[location] = in->timing.v_addressable + in->timing.v_border_bottom + in->timing.v_border_top; + out->RefreshRate[location] = ((in->timing.pix_clk_100hz * 100) / in->timing.h_total) / in->timing.v_total; + out->VFrontPorch[location] = in->timing.v_front_porch; + out->PixelClock[location] = in->timing.pix_clk_100hz / 10000.00; + if (in->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + out->PixelClock[location] *= 2; + out->HTotal[location] = in->timing.h_total; + out->VTotal[location] = in->timing.v_total; + out->Interlace[location] = in->timing.flags.INTERLACE; + hblank_start = in->timing.h_total - in->timing.h_front_porch; + out->HBlankEnd[location] = hblank_start + - in->timing.h_addressable + - in->timing.h_border_left + - in->timing.h_border_right; + vblank_start = in->timing.v_total - in->timing.v_front_porch; + out->VBlankEnd[location] = vblank_start + - in->timing.v_addressable + - in->timing.v_border_top + - in->timing.v_border_bottom; + out->DRRDisplay[location] = false; +} + +static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) +{ + unsigned int output_bpc; + + out->DSCEnable[location] = (enum dml_dsc_enable)in->timing.flags.DSC; + out->OutputLinkDPLanes[location] = 4; // As per code in dcn20_resource.c + out->DSCInputBitPerComponent[location] = 12; // As per code in dcn20_resource.c + out->DSCSlices[location] = in->timing.dsc_cfg.num_slices_h; + + switch (in->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + out->OutputEncoder[location] = dml_dp; + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; + break; + case SIGNAL_TYPE_EDP: + out->OutputEncoder[location] = dml_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + out->OutputEncoder[location] = dml_hdmi; + break; + default: + out->OutputEncoder[location] = dml_dp; + } + + switch (in->timing.display_color_depth) { + case COLOR_DEPTH_666: + output_bpc = 6; + break; + case COLOR_DEPTH_888: + output_bpc = 8; + break; + case COLOR_DEPTH_101010: + output_bpc = 10; + break; + case COLOR_DEPTH_121212: + output_bpc = 12; + break; + case COLOR_DEPTH_141414: + output_bpc = 14; + break; + case COLOR_DEPTH_161616: + output_bpc = 16; + break; + case COLOR_DEPTH_999: + output_bpc = 9; + break; + case COLOR_DEPTH_111111: + output_bpc = 11; + break; + default: + output_bpc = 8; + break; + } + + switch (in->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + out->OutputFormat[location] = dml_444; + out->OutputBpp[location] = (dml_float_t)output_bpc * 3; + break; + case PIXEL_ENCODING_YCBCR420: + out->OutputFormat[location] = dml_420; + out->OutputBpp[location] = (output_bpc * 3.0) / 2; + break; + case PIXEL_ENCODING_YCBCR422: + if (in->timing.flags.DSC && !in->timing.dsc_cfg.ycbcr422_simple) + out->OutputFormat[location] = dml_n422; + else + out->OutputFormat[location] = dml_s422; + out->OutputBpp[location] = (dml_float_t)output_bpc * 2; + break; + default: + out->OutputFormat[location] = dml_444; + out->OutputBpp[location] = (dml_float_t)output_bpc * 3; + break; + } + + if (in->timing.flags.DSC) { + out->OutputBpp[location] = in->timing.dsc_cfg.bits_per_pixel / 16.0; + } + + // This has been false throughout DCN32x development. If needed we can change this later on. + out->OutputMultistreamEn[location] = false; + + switch (in->signal) { + case SIGNAL_TYPE_NONE: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_RGB: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_VIRTUAL: + default: + out->OutputLinkDPRate[location] = dml_dp_rate_na; + break; + } + + out->PixelClockBackEnd[location] = in->timing.pix_clk_100hz / 10000.00; + + out->AudioSampleLayout[location] = in->audio_info.modes->sample_size; + out->AudioSampleRate[location] = in->audio_info.modes->max_bit_rate; + + out->OutputDisabled[location] = true; +} + +static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +{ + out->SurfaceWidthY[location] = in->timing.h_addressable; + out->SurfaceHeightY[location] = in->timing.v_addressable; + out->SurfaceWidthC[location] = in->timing.h_addressable; + out->SurfaceHeightC[location] = in->timing.v_addressable; + out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128; + out->PitchC[location] = 1; + out->DCCEnable[location] = false; + out->DCCMetaPitchY[location] = 0; + out->DCCMetaPitchC[location] = 0; + out->DCCRateLuma[location] = 1.0; + out->DCCRateChroma[location] = 1.0; + out->DCCFractionOfZeroSizeRequestsLuma[location] = 0; + out->DCCFractionOfZeroSizeRequestsChroma[location] = 0; + out->SurfaceTiling[location] = dml_sw_64kb_r_x; + out->SourcePixelFormat[location] = dml_444_32; +} + +static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_project, struct dml_surface_cfg_st *out, unsigned int location, const struct dc_plane_state *in) +{ + out->PitchY[location] = in->plane_size.surface_pitch; + out->SurfaceHeightY[location] = in->plane_size.surface_size.height; + out->SurfaceWidthY[location] = in->plane_size.surface_size.width; + out->SurfaceHeightC[location] = in->plane_size.chroma_size.height; + out->SurfaceWidthC[location] = in->plane_size.chroma_size.width; + out->PitchC[location] = in->plane_size.chroma_pitch; + out->DCCEnable[location] = in->dcc.enable; + out->DCCMetaPitchY[location] = in->dcc.meta_pitch; + out->DCCMetaPitchC[location] = in->dcc.meta_pitch_c; + out->DCCRateLuma[location] = 1.0; + out->DCCRateChroma[location] = 1.0; + out->DCCFractionOfZeroSizeRequestsLuma[location] = in->dcc.independent_64b_blks; + out->DCCFractionOfZeroSizeRequestsChroma[location] = in->dcc.independent_64b_blks_c; + + switch (dml2_project) { + default: + out->SurfaceTiling[location] = (enum dml_swizzle_mode)in->tiling_info.gfx9.swizzle; + break; + case dml_project_dcn401: + // Temporary use gfx11 swizzle in dml, until proper dml for DCN4x is integrated/implemented + switch (in->tiling_info.gfx_addr3.swizzle) { + case DC_ADDR3_SW_4KB_2D: + case DC_ADDR3_SW_64KB_2D: + case DC_ADDR3_SW_256KB_2D: + default: + out->SurfaceTiling[location] = dml_sw_64kb_r_x; + break; + case DC_ADDR3_SW_LINEAR: + out->SurfaceTiling[location] = dml_sw_linear; + break; + } + } + + switch (in->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + out->SourcePixelFormat[location] = dml_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + out->SourcePixelFormat[location] = dml_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + out->SourcePixelFormat[location] = dml_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + out->SourcePixelFormat[location] = dml_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + out->SourcePixelFormat[location] = dml_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + out->SourcePixelFormat[location] = dml_rgbe_alpha; + break; + default: + out->SourcePixelFormat[location] = dml_444_32; + break; + } +} + +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) +{ + int i; + struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); + + for (i = 0; i < MAX_PIPES; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == in && !pipe->prev_odm_pipe) { + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + temp_pipe->stream_res = pipe->stream_res; + resource_build_scaling_params(temp_pipe); + break; + } + } + + ASSERT(i < MAX_PIPES); + return &temp_pipe->plane_res.scl_data; +} + +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) +{ + dml_uint_t width, height; + + if (in->timing.h_addressable > 3840) + width = 3840; + else + width = in->timing.h_addressable; // 4K max + + if (in->timing.v_addressable > 2160) + height = 2160; + else + height = in->timing.v_addressable; // 4K max + + out->CursorBPP[location] = dml_cur_32bit; + out->CursorWidth[location] = 256; + + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; + + out->ViewportWidth[location] = width; + out->ViewportHeight[location] = height; + out->ViewportStationary[location] = false; + out->ViewportWidthChroma[location] = 0; + out->ViewportHeightChroma[location] = 0; + out->ViewportXStart[location] = 0; + out->ViewportXStartC[location] = 0; + out->ViewportYStart[location] = 0; + out->ViewportYStartC[location] = 0; + + out->ScalerEnabled[location] = false; + out->HRatio[location] = 1.0; + out->VRatio[location] = 1.0; + out->HRatioChroma[location] = 0; + out->VRatioChroma[location] = 0; + out->HTaps[location] = 1; + out->VTaps[location] = 1; + out->HTapsChroma[location] = 0; + out->VTapsChroma[location] = 0; + out->SourceScan[location] = dml_rotation_0; + out->ScalerRecoutWidth[location] = width; + + out->LBBitPerPixel[location] = 57; + + out->DynamicMetadataEnable[location] = false; + + out->NumberOfCursors[location] = 1; + out->UseMALLForStaticScreen[location] = dml_use_mall_static_screen_disable; + out->UseMALLForPStateChange[location] = dml_use_mall_pstate_change_disable; + + out->DETSizeOverride[location] = 256; + + out->ScalerEnabled[location] = false; +} + +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) +{ + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); + + out->CursorBPP[location] = dml_cur_32bit; + out->CursorWidth[location] = 256; + + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; + + out->ViewportWidth[location] = scaler_data->viewport.width; + out->ViewportHeight[location] = scaler_data->viewport.height; + out->ViewportWidthChroma[location] = scaler_data->viewport_c.width; + out->ViewportHeightChroma[location] = scaler_data->viewport_c.height; + out->ViewportXStart[location] = scaler_data->viewport.x; + out->ViewportYStart[location] = scaler_data->viewport.y; + out->ViewportXStartC[location] = scaler_data->viewport_c.x; + out->ViewportYStartC[location] = scaler_data->viewport_c.y; + out->ViewportStationary[location] = false; + + out->ScalerEnabled[location] = scaler_data->ratios.horz.value != dc_fixpt_one.value || + scaler_data->ratios.horz_c.value != dc_fixpt_one.value || + scaler_data->ratios.vert.value != dc_fixpt_one.value || + scaler_data->ratios.vert_c.value != dc_fixpt_one.value; + + /* Current driver code base uses LBBitPerPixel as 57. There is a discrepancy + * from the HW/DML teams about this value. Initialize LBBitPerPixel with the + * value current used in Navi3x . + */ + + out->LBBitPerPixel[location] = 57; + + if (out->ScalerEnabled[location] == false) { + out->HRatio[location] = 1; + out->HRatioChroma[location] = 1; + out->VRatio[location] = 1; + out->VRatioChroma[location] = 1; + } else { + /* Follow the original dml_wrapper.c code direction to fix scaling issues */ + out->HRatio[location] = (dml_float_t)scaler_data->ratios.horz.value / (1ULL << 32); + out->HRatioChroma[location] = (dml_float_t)scaler_data->ratios.horz_c.value / (1ULL << 32); + out->VRatio[location] = (dml_float_t)scaler_data->ratios.vert.value / (1ULL << 32); + out->VRatioChroma[location] = (dml_float_t)scaler_data->ratios.vert_c.value / (1ULL << 32); + } + + if (!scaler_data->taps.h_taps) { + out->HTaps[location] = 1; + out->HTapsChroma[location] = 1; + } else { + out->HTaps[location] = scaler_data->taps.h_taps; + out->HTapsChroma[location] = scaler_data->taps.h_taps_c; + } + if (!scaler_data->taps.v_taps) { + out->VTaps[location] = 1; + out->VTapsChroma[location] = 1; + } else { + out->VTaps[location] = scaler_data->taps.v_taps; + out->VTapsChroma[location] = scaler_data->taps.v_taps_c; + } + + out->SourceScan[location] = (enum dml_rotation_angle)in->rotation; + out->ScalerRecoutWidth[location] = in->dst_rect.width; + + out->DynamicMetadataEnable[location] = false; + out->DynamicMetadataLinesBeforeActiveRequired[location] = 0; + out->DynamicMetadataTransmittedBytes[location] = 0; + + out->NumberOfCursors[location] = 1; +} + +static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, + const struct dc_stream_state *stream, const struct dml_display_cfg_st *dml_dispcfg) +{ + int i = 0; + int location = -1; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { + location = i; + break; + } + } + + return location; +} + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, + const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) +{ + unsigned int plane_id; + int i = 0; + int location = -1; + + if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) { + ASSERT(false); + return -1; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { + location = i; + break; + } + } + + return location; +} + +static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) +{ + unsigned int i; + unsigned int pipe_index = 0; + unsigned int plane_index = 0; + struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe || !pipe->stream || !pipe->plane_state) + continue; + + while (pipe) { + pipe_index = pipe->pipe_idx; + + if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index; + plane_index++; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true; + } + + pipe = pipe->bottom_pipe; + } + + plane_index = 0; + } +} + +static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, + unsigned int location, const struct dc_stream_state *in) +{ + if (in->num_wb_info > 0) { + for (int i = 0; i < __DML_NUM_DMB__; i++) { + const struct dc_writeback_info *wb_info = &in->writeback_info[i]; + /*current dml support 1 dwb per stream, limitation*/ + if (wb_info->wb_enabled) { + out->WritebackEnable[location] = wb_info->wb_enabled; + out->ActiveWritebacksPerSurface[location] = wb_info->dwb_params.cnv_params.src_width; + out->WritebackDestinationWidth[location] = wb_info->dwb_params.dest_width; + out->WritebackDestinationHeight[location] = wb_info->dwb_params.dest_height; + + out->WritebackSourceWidth[location] = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + + out->WritebackSourceHeight[location] = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + /*current design does not have chroma scaling, need to follow up*/ + out->WritebackHTaps[location] = wb_info->dwb_params.scaler_taps.h_taps > 0 ? + wb_info->dwb_params.scaler_taps.h_taps : 1; + out->WritebackVTaps[location] = wb_info->dwb_params.scaler_taps.v_taps > 0 ? + wb_info->dwb_params.scaler_taps.v_taps : 1; + + out->WritebackHRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + out->WritebackVRatio[location] = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + } + } + } +} + +static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context) +{ + int i; + struct pipe_ctx *current_pipe_context; + + /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */ + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1; + } + + /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */ + for (i = 0; i < MAX_PIPES; i++) { + current_pipe_context = &context->res_ctx.pipe_ctx[i]; + if (current_pipe_context->stream && + current_pipe_context->stream_res.hpo_dp_stream_enc && + current_pipe_context->link_res.hpo_dp_link_enc && + dc_is_dp_signal(current_pipe_context->stream->signal)) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] = + current_pipe_context->link_res.hpo_dp_link_enc->inst; + } + } +} + +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) +{ + int i = 0, j = 0, k = 0; + int disp_cfg_stream_location, disp_cfg_plane_location; + enum mall_stream_type stream_mall_type; + struct pipe_ctx *current_pipe_context; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = false; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = false; + } + + //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file + dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; + dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; + dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; + dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; + if (dml2->v20.dml_core_ctx.ip.hostvm_enable) + dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + + dml2_populate_pipe_to_plane_index_mapping(dml2, context); + dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context); + + for (i = 0; i < context->stream_count; i++) { + current_pipe_context = NULL; + for (k = 0; k < MAX_PIPES; k++) { + /* find one pipe allocated to this stream for the purpose of getting + info about the link later */ + if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) { + current_pipe_context = &context->res_ctx.pipe_ctx[k]; + break; + } + } + disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); + stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); + + if (disp_cfg_stream_location < 0) + disp_cfg_stream_location = dml_dispcfg->num_timings++; + + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); + /*Call site for populate_dml_writeback_cfg_from_stream_state*/ + populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, + disp_cfg_stream_location, context->streams[i]); + + switch (context->streams[i]->debug.force_odm_combine_segments) { + case 2: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1; + break; + case 4: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_4to1; + break; + default: + break; + } + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[i]->stream_id; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; + + if (context->stream_status[i].plane_count == 0) { + disp_cfg_plane_location = dml_dispcfg->num_surfaces++; + + populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); + + dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + } else { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2, + context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j); + + if (disp_cfg_plane_location < 0) + disp_cfg_plane_location = dml_dispcfg->num_surfaces++; + + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); + + if (stream_mall_type == SUBVP_MAIN) { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; + } else if (stream_mall_type == SUBVP_PHANTOM) { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable; + dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required; + } else { + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_disable; + dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; + } + + dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; + + if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j, + &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + + if (j >= 1) { + populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2); + switch (context->streams[i]->debug.force_odm_combine_segments) { + case 2: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; + break; + case 4: + dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_4to1; + break; + default: + break; + } + + if (stream_mall_type == SUBVP_MAIN) + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; + else if (stream_mall_type == SUBVP_PHANTOM) + dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; + + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id; + dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_plane_location] = true; + + dml_dispcfg->num_timings++; + } + } + } + } +} + +void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, + struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, + struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, + struct pipe_ctx *out) +{ + memset(&out->rq_regs, 0, sizeof(out->rq_regs)); + out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; + out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; + out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; + out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; + out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; + out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; + out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; + out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; + + out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; + out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; + out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; + out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; + out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; + out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; + out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; + out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; + + out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; + out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; + out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; + out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; + out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; + + memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); + out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; + out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; + out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; + out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; + out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; + out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; + out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; + out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; + out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; + out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; + out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; + out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; + out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; + out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; + out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; + out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; + out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; + out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; + out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; + out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; + out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; + out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; + out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; + out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; + out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; + out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; + out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; + out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; + out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; + out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; + out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; + out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; + out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; + out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; + out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; + out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; + out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; + out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; + out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; + out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; + out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; + out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; + out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; + out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; + out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; + + memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); + out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; + out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; + out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; + out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; + out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; + out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; + out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; + out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; + out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; + out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; + out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; + out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; + out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; + out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; + out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; + out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; + out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; + out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; + out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; + out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h new file mode 100644 index 000000000000..d764773938f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_translation_helper.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML2_TRANSLATION_HELPER_H__ +#define __DML2_TRANSLATION_HELPER_H__ + +void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out); +void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out); +void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, + const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out); +void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out); +void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out); +void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); +void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); + +#endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c new file mode 100644 index 000000000000..9a33158b63bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.c @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +//#include "dml2_utils.h" +#include "display_mode_core.h" +#include "dml_display_rq_dlg_calc.h" +#include "dml2_internal_types.h" +#include "dml2_translation_helper.h" +#include "dml2_utils.h" + +void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index) +{ + dml_timing_array->HTotal[dst_index] = dml_timing_array->HTotal[src_index]; + dml_timing_array->VTotal[dst_index] = dml_timing_array->VTotal[src_index]; + dml_timing_array->HBlankEnd[dst_index] = dml_timing_array->HBlankEnd[src_index]; + dml_timing_array->VBlankEnd[dst_index] = dml_timing_array->VBlankEnd[src_index]; + dml_timing_array->RefreshRate[dst_index] = dml_timing_array->RefreshRate[src_index]; + dml_timing_array->VFrontPorch[dst_index] = dml_timing_array->VFrontPorch[src_index]; + dml_timing_array->PixelClock[dst_index] = dml_timing_array->PixelClock[src_index]; + dml_timing_array->HActive[dst_index] = dml_timing_array->HActive[src_index]; + dml_timing_array->VActive[dst_index] = dml_timing_array->VActive[src_index]; + dml_timing_array->Interlace[dst_index] = dml_timing_array->Interlace[src_index]; + dml_timing_array->DRRDisplay[dst_index] = dml_timing_array->DRRDisplay[src_index]; + dml_timing_array->VBlankNom[dst_index] = dml_timing_array->VBlankNom[src_index]; +} + +void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index) +{ + dml_plane_array->GPUVMMinPageSizeKBytes[dst_index] = dml_plane_array->GPUVMMinPageSizeKBytes[src_index]; + dml_plane_array->ForceOneRowForFrame[dst_index] = dml_plane_array->ForceOneRowForFrame[src_index]; + dml_plane_array->PTEBufferModeOverrideEn[dst_index] = dml_plane_array->PTEBufferModeOverrideEn[src_index]; + dml_plane_array->PTEBufferMode[dst_index] = dml_plane_array->PTEBufferMode[src_index]; + dml_plane_array->ViewportWidth[dst_index] = dml_plane_array->ViewportWidth[src_index]; + dml_plane_array->ViewportHeight[dst_index] = dml_plane_array->ViewportHeight[src_index]; + dml_plane_array->ViewportWidthChroma[dst_index] = dml_plane_array->ViewportWidthChroma[src_index]; + dml_plane_array->ViewportHeightChroma[dst_index] = dml_plane_array->ViewportHeightChroma[src_index]; + dml_plane_array->ViewportXStart[dst_index] = dml_plane_array->ViewportXStart[src_index]; + dml_plane_array->ViewportXStartC[dst_index] = dml_plane_array->ViewportXStartC[src_index]; + dml_plane_array->ViewportYStart[dst_index] = dml_plane_array->ViewportYStart[src_index]; + dml_plane_array->ViewportYStartC[dst_index] = dml_plane_array->ViewportYStartC[src_index]; + dml_plane_array->ViewportStationary[dst_index] = dml_plane_array->ViewportStationary[src_index]; + + dml_plane_array->ScalerEnabled[dst_index] = dml_plane_array->ScalerEnabled[src_index]; + dml_plane_array->HRatio[dst_index] = dml_plane_array->HRatio[src_index]; + dml_plane_array->VRatio[dst_index] = dml_plane_array->VRatio[src_index]; + dml_plane_array->HRatioChroma[dst_index] = dml_plane_array->HRatioChroma[src_index]; + dml_plane_array->VRatioChroma[dst_index] = dml_plane_array->VRatioChroma[src_index]; + dml_plane_array->HTaps[dst_index] = dml_plane_array->HTaps[src_index]; + dml_plane_array->VTaps[dst_index] = dml_plane_array->VTaps[src_index]; + dml_plane_array->HTapsChroma[dst_index] = dml_plane_array->HTapsChroma[src_index]; + dml_plane_array->VTapsChroma[dst_index] = dml_plane_array->VTapsChroma[src_index]; + dml_plane_array->LBBitPerPixel[dst_index] = dml_plane_array->LBBitPerPixel[src_index]; + + dml_plane_array->SourceScan[dst_index] = dml_plane_array->SourceScan[src_index]; + dml_plane_array->ScalerRecoutWidth[dst_index] = dml_plane_array->ScalerRecoutWidth[src_index]; + + dml_plane_array->DynamicMetadataEnable[dst_index] = dml_plane_array->DynamicMetadataEnable[src_index]; + dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[dst_index] = dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[src_index]; + dml_plane_array->DynamicMetadataTransmittedBytes[dst_index] = dml_plane_array->DynamicMetadataTransmittedBytes[src_index]; + dml_plane_array->DETSizeOverride[dst_index] = dml_plane_array->DETSizeOverride[src_index]; + + dml_plane_array->NumberOfCursors[dst_index] = dml_plane_array->NumberOfCursors[src_index]; + dml_plane_array->CursorWidth[dst_index] = dml_plane_array->CursorWidth[src_index]; + dml_plane_array->CursorBPP[dst_index] = dml_plane_array->CursorBPP[src_index]; + + dml_plane_array->UseMALLForStaticScreen[dst_index] = dml_plane_array->UseMALLForStaticScreen[src_index]; + dml_plane_array->UseMALLForPStateChange[dst_index] = dml_plane_array->UseMALLForPStateChange[src_index]; + + dml_plane_array->BlendingAndTiming[dst_index] = dml_plane_array->BlendingAndTiming[src_index]; +} + +void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index) +{ + dml_surface_array->SurfaceTiling[dst_index] = dml_surface_array->SurfaceTiling[src_index]; + dml_surface_array->SourcePixelFormat[dst_index] = dml_surface_array->SourcePixelFormat[src_index]; + dml_surface_array->PitchY[dst_index] = dml_surface_array->PitchY[src_index]; + dml_surface_array->SurfaceWidthY[dst_index] = dml_surface_array->SurfaceWidthY[src_index]; + dml_surface_array->SurfaceHeightY[dst_index] = dml_surface_array->SurfaceHeightY[src_index]; + dml_surface_array->PitchC[dst_index] = dml_surface_array->PitchC[src_index]; + dml_surface_array->SurfaceWidthC[dst_index] = dml_surface_array->SurfaceWidthC[src_index]; + dml_surface_array->SurfaceHeightC[dst_index] = dml_surface_array->SurfaceHeightC[src_index]; + + dml_surface_array->DCCEnable[dst_index] = dml_surface_array->DCCEnable[src_index]; + dml_surface_array->DCCMetaPitchY[dst_index] = dml_surface_array->DCCMetaPitchY[src_index]; + dml_surface_array->DCCMetaPitchC[dst_index] = dml_surface_array->DCCMetaPitchC[src_index]; + + dml_surface_array->DCCRateLuma[dst_index] = dml_surface_array->DCCRateLuma[src_index]; + dml_surface_array->DCCRateChroma[dst_index] = dml_surface_array->DCCRateChroma[src_index]; + dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[src_index]; + dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[src_index]; +} + +void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index) +{ + dml_output_array->DSCInputBitPerComponent[dst_index] = dml_output_array->DSCInputBitPerComponent[src_index]; + dml_output_array->OutputFormat[dst_index] = dml_output_array->OutputFormat[src_index]; + dml_output_array->OutputEncoder[dst_index] = dml_output_array->OutputEncoder[src_index]; + dml_output_array->OutputMultistreamId[dst_index] = dml_output_array->OutputMultistreamId[src_index]; + dml_output_array->OutputMultistreamEn[dst_index] = dml_output_array->OutputMultistreamEn[src_index]; + dml_output_array->OutputBpp[dst_index] = dml_output_array->OutputBpp[src_index]; + dml_output_array->PixelClockBackEnd[dst_index] = dml_output_array->PixelClockBackEnd[src_index]; + dml_output_array->DSCEnable[dst_index] = dml_output_array->DSCEnable[src_index]; + dml_output_array->OutputLinkDPLanes[dst_index] = dml_output_array->OutputLinkDPLanes[src_index]; + dml_output_array->OutputLinkDPRate[dst_index] = dml_output_array->OutputLinkDPRate[src_index]; + dml_output_array->ForcedOutputLinkBPP[dst_index] = dml_output_array->ForcedOutputLinkBPP[src_index]; + dml_output_array->AudioSampleRate[dst_index] = dml_output_array->AudioSampleRate[src_index]; + dml_output_array->AudioSampleLayout[dst_index] = dml_output_array->AudioSampleLayout[src_index]; +} + +unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled) +{ + switch (encoder) { + case dml_dp: + case dml_edp: + return 2; + case dml_dp2p0: + if (dsc_enabled || force_odm_4to1) + return 4; + else + return 2; + case dml_hdmi: + return 1; + case dml_hdmifrl: + if (force_odm_4to1) + return 4; + else + return 2; + default: + return 1; + } +} + +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +{ + if (pipe_ctx == NULL || pipe_ctx->stream == NULL) + return false; + + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + +bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context) +{ + context->bw_ctx.bw.dcn.clk.dispclk_khz = out_clks->dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = out_clks->dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = out_clks->uclk_mts / 16; + context->bw_ctx.bw.dcn.clk.fclk_khz = out_clks->fclk_khz; + context->bw_ctx.bw.dcn.clk.phyclk_khz = out_clks->phyclk_khz; + context->bw_ctx.bw.dcn.clk.socclk_khz = out_clks->socclk_khz; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = out_clks->ref_dtbclk_khz; + context->bw_ctx.bw.dcn.clk.p_state_change_support = out_clks->p_state_supported; +} + +int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) + return i; + } + + return -1; +} + +static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) + return i; + } + + return -1; +} + +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) +{ + unsigned int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i]->stream_id == stream_id) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (j == plane_index))) { + *plane_id = (i << 16) | j; + return true; + } + } + } + } + + return false; +} + +static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx) +{ + unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + + hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right; + vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; + hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; + vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; + + hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right; + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; + + pipe_ctx->pipe_dlg_param.vstartup_start = dml_get_vstartup_calculated(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vupdate_offset = dml_get_vupdate_offset(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vupdate_width = dml_get_vupdate_width(mode_lib, pipe_idx); + pipe_ctx->pipe_dlg_param.vready_offset = dml_get_vready_offset(mode_lib, pipe_idx); + + pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; + + pipe_ctx->pipe_dlg_param.hactive = hactive; + pipe_ctx->pipe_dlg_param.vactive = vactive; + pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; + pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; + pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; + pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; + pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; + pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; + pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; + pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; + pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; + pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; + pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; + pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; + pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; +} + +void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) +{ + unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id; + enum mall_stream_type pipe_mall_type; + struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch; + + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + context->bw_ctx.bw.dcn.compbuf_size_kb = in_ctx->v20.dml_core_ctx.ip.config_return_buffer_size_in_kbytes; + + for (dc_pipe_ctx_index = 0; dc_pipe_ctx_index < pipe_cnt; dc_pipe_ctx_index++) { + if (!context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream) + continue; + /* The DML2 and the DC logic of determining pipe indices are different from each other so + * there is a need to know which DML pipe index maps to which DC pipe. The code below + * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then + * it finds a dml_pipe_index from the stream id. */ + if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) { + dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); + } else { + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); + } + + if (dml_pipe_idx == 0xFFFFFFFF) + continue; + ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[dml_pipe_idx]); + ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_pipe_idx] == context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); + + /* Use the dml_pipe_index here for the getters to fetch the correct values and dc_pipe_index in the pipe_ctx to populate them + * at the right locations. + */ + populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + + pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]); + if (pipe_mall_type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = dml_get_det_buffer_size_kbytes(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState; + } + + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb; + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz = dml_get_dppclk_calculated(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx) * 1000; + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz) + context->bw_ctx.bw.dcn.clk.dppclk_khz = context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz; + + dml_rq_dlg_get_rq_reg(&s->rq_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); + dml_rq_dlg_get_dlg_reg(&s->disp_dlg_regs, &s->disp_ttu_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx); + dml2_update_pipe_ctx_dchub_regs(&s->rq_regs, &s->disp_dlg_regs, &s->disp_ttu_regs, &out_new_hw_state->pipe_ctx[dc_pipe_ctx_index]); + + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes = dml_get_surface_size_for_mall(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); + + /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ + /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ + if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state && + (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe == NULL || + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) && + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) { + /* SS: all active surfaces stored in MALL */ + if (pipe_mall_type != SUBVP_PHANTOM) { + context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; + } else { + /* SUBVP: phantom surfaces only stored in MALL */ + context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; + } + } + } + + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dispclk_mhz + * 1000; + + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz ; + } +} + +void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx) +{ + watermark->urgent_ns = dml_get_wm_urgent(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_enter_plus_exit_ns = dml_get_wm_stutter_enter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_exit_ns = dml_get_wm_stutter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.pstate_change_ns = dml_get_wm_dram_clock_change(dml_core_ctx) * 1000; + watermark->pte_meta_urgent_ns = dml_get_wm_memory_trip(dml_core_ctx) * 1000; + watermark->frac_urg_bw_nom = dml_get_fraction_of_urgent_bandwidth(dml_core_ctx) * 1000; + watermark->frac_urg_bw_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(dml_core_ctx) * 1000; + watermark->urgent_latency_ns = dml_get_urgent_latency(dml_core_ctx) * 1000; + watermark->cstate_pstate.fclk_pstate_change_ns = dml_get_wm_fclk_change(dml_core_ctx) * 1000; + watermark->usr_retraining_ns = dml_get_wm_usr_retraining(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_enter_plus_exit_z8_ns = dml_get_wm_z8_stutter_enter_exit(dml_core_ctx) * 1000; + watermark->cstate_pstate.cstate_exit_z8_ns = dml_get_wm_z8_stutter(dml_core_ctx) * 1000; +} + +unsigned int dml2_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark) +{ + unsigned int time_per_byte = 0; + unsigned int total_free_entry = 0xb40; + unsigned int buf_lh_capability; + unsigned int max_scaled_time; + + if (mode == PACKED_444) /* packed mode 32 bpp */ + time_per_byte = time_per_pixel/4; + else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */ + time_per_byte = time_per_pixel/8; + + if (time_per_byte == 0) + time_per_byte = 1; + + buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/ + max_scaled_time = buf_lh_capability - urgent_watermark; + return max_scaled_time; +} + +void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) +{ + int i, j = 0; + struct mcif_arb_params *wb_arb_params = NULL; + struct dcn_bw_writeback *bw_writeback = NULL; + enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ + + if (context->stream_count != 0) { + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->num_wb_info != 0) + j++; + } + } + if (j == 0) /*no dwb */ + return; + for (i = 0; i < __DML_NUM_DMB__; i++) { + bw_writeback = &context->bw_ctx.bw.dcn.bw_writeback; + wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[i]; + + for (j = 0 ; j < 4; j++) { + /*current dml only has one set of watermark, need to follow up*/ + bw_writeback->mcif_wb_arb[i].cli_watermark[j] = + dml_get_wm_writeback_urgent(dml_core_ctx) * 1000; + bw_writeback->mcif_wb_arb[i].pstate_watermark[j] = + dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; + } + if (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk != 0) { + /* time_per_pixel should be in u6.6 format */ + bw_writeback->mcif_wb_arb[i].time_per_pixel = + (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; + } + bw_writeback->mcif_wb_arb[i].slice_lines = 32; + bw_writeback->mcif_wb_arb[i].arbitration_slice = 2; + bw_writeback->mcif_wb_arb[i].max_scaled_time = + dml2_calc_max_scaled_time(wb_arb_params->time_per_pixel, + wbif_mode, wb_arb_params->cli_watermark[0]); + /*not required any more*/ + bw_writeback->mcif_wb_arb[i].dram_speed_change_duration = + dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000; + + } +} +void dml2_initialize_det_scratch(struct dml2_context *in_ctx) +{ + int i; + + for (i = 0; i < MAX_PLANES; i++) { + in_ctx->det_helper_scratch.dpps_per_surface[i] = 1; + } +} + +static unsigned int find_planes_per_stream_and_stream_count(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg, int *num_of_planes_per_stream) +{ + unsigned int plane_index, stream_index = 0, num_of_streams; + + for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { + /* Number of planes per stream */ + num_of_planes_per_stream[stream_index] += 1; + + if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) + stream_index++; + } + + num_of_streams = stream_index + 1; + + return num_of_streams; +} + +void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg) +{ + unsigned int num_of_streams = 0, plane_index = 0, max_det_size, stream_index = 0; + int num_of_planes_per_stream[__DML_NUM_PLANES__] = { 0 }; + + max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; + + num_of_streams = find_planes_per_stream_and_stream_count(in_ctx, dml_dispcfg, num_of_planes_per_stream); + + for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) { + + if (in_ctx->config.override_det_buffer_size_kbytes) + dml_dispcfg->plane.DETSizeOverride[plane_index] = max_det_size / in_ctx->config.dcn_pipe_count; + else { + dml_dispcfg->plane.DETSizeOverride[plane_index] = ((max_det_size / num_of_streams) / num_of_planes_per_stream[stream_index] / in_ctx->det_helper_scratch.dpps_per_surface[plane_index]); + + /* If the override size is not divisible by det_segment_size then round off to nearest number divisible by det_segment_size as + * this is a requirement. + */ + if (dml_dispcfg->plane.DETSizeOverride[plane_index] % in_ctx->config.det_segment_size != 0) { + dml_dispcfg->plane.DETSizeOverride[plane_index] = dml_dispcfg->plane.DETSizeOverride[plane_index] & ~0x3F; + } + + if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1]) + stream_index++; + } + } +} + +bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch) +{ + unsigned int i = 0, dml_pipe_idx = 0, plane_id = 0; + unsigned int max_det_size, total_det_allocated = 0; + bool need_recalculation = false; + + max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp; + + for (i = 0; i < MAX_PIPES; i++) { + if (!display_state->res_ctx.pipe_ctx[i].stream) + continue; + if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state, + display_state->res_ctx.pipe_ctx[i].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id)) + dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); + else + dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id); + + if (dml_pipe_idx == 0xFFFFFFFF) + continue; + total_det_allocated += dml_get_det_buffer_size_kbytes(&in_ctx->v20.dml_core_ctx, dml_pipe_idx); + if (total_det_allocated > max_det_size) { + need_recalculation = true; + } + } + + /* Store the DPPPerSurface for correctly determining the number of planes in the next call. */ + for (i = 0; i < MAX_PLANES; i++) { + det_scratch->dpps_per_surface[i] = in_ctx->v20.scratch.cur_display_config.hw.DPPPerSurface[i]; + } + + return need_recalculation; +} + +bool dml2_is_stereo_timing(const struct dc_stream_state *stream) +{ + bool is_stereo = false; + + if ((stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)) + is_stereo = true; + + return is_stereo; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h new file mode 100644 index 000000000000..04fcfe637119 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_utils.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _DML2_UTILS_H_ +#define _DML2_UTILS_H_ + +#include "os_types.h" +#include "dml2_dc_types.h" + +struct dc; +struct dml_timing_cfg_st; +struct dml2_dcn_clocks; +struct dc_state; + +void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index); +void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index); +unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled); +void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context); +void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx); +void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx); +int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); +bool is_dtbclk_required(const struct dc *dc, struct dc_state *context); +bool dml2_is_stereo_timing(const struct dc_stream_state *stream); +unsigned int dml2_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark); + +/* + * dml2_dc_construct_pipes - This function will determine if we need additional pipes based + * on the DML calculated outputs for MPC, ODM and allocate them as necessary. This function + * could be called after in dml_validate_build_resource after dml_mode_pragramming like : + * { + * ... + * map_hw_resources(&s->cur_display_config, &s->mode_support_info); + * result = dml_mode_programming(&in_ctx->dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + * dml2_dc_construct_pipes(in_display_state, s->mode_support_info, out_hw_context); + * ... + * } + * + * @context: To obtain res_ctx and read other information like stream ID etc. + * @dml_mode_support_st : To get the ODM, MPC outputs as determined by the DML. + * @out_hw_context : Handle to the new hardware context. + * + * + * Return: None. + */ +void dml2_dc_construct_pipes(struct dc_state *context, struct dml_mode_support_info_st *dml_mode_support_st, + struct resource_context *out_hw_context); + +/* + * dml2_predict_pipe_split - This function is the dml2 version of predict split pipe. It predicts a + * if pipe split is required or not and returns the output as a bool. + * @context : dc_state. + * @pipe : old_index is the index of the pipe as derived from pipe_idx. + * @index : index of the pipe + * + * + * Return: Returns the result in boolean. + */ +bool dml2_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); + +/* + * dml2_build_mapped_resource - This function is the dml2 version of build_mapped_resource. + * In case of ODM, we need to build pipe hardware params again as done in dcn20_build_mapped_resource. + * @dc : struct dc + * @context : struct dc_state. + * @stream : stream whoose corresponding pipe params need to be modified. + * + * + * Return: Returns DC_OK if successful. + */ +enum dc_status dml2_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); + +/* + * dml2_extract_rq_regs - This function will extract information needed for struct _vcs_dpi_display_rq_regs_st + * and populate it. + * @context: To obtain and populate the res_ctx->pipe_ctx->rq_regs with DML outputs. + * @support : This structure has the DML intermediate outputs required to populate rq_regs. + * + * + * Return: None. + */ + + /* + * dml2_calculate_rq_and_dlg_params - This function will call into DML2 functions needed + * for populating rq, ttu and dlg param structures and populate it. + * @dc : struct dc + * @context : dc_state provides a handle to selectively populate pipe_ctx + * @out_new_hw_state: To obtain and populate the rq, dlg and ttu regs in + * out_new_hw_state->pipe_ctx with DML outputs. + * @in_ctx : This structure has the pointer to display_mode_lib_st. + * @pipe_cnt : DML functions to obtain RQ, TTu and DLG params need a pipe_index. + * This helps provide pipe_index in the pipe_cnt loop. + * + * + * Return: None. + */ +void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt); + +/* + * dml2_apply_det_buffer_allocation_policy - This function will determine the DET Buffer size + * and return the number of streams. + * @dml2 : Handle for dml2 context + * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config + * Return : None. + */ +void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg); + +/* + * dml2_verify_det_buffer_configuration - This function will verify if the allocated DET buffer exceeds + * the total available DET size available and outputs a boolean to indicate if recalulation is needed. + * @dml2 : Handle for dml2 context + * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config + * @struct dml2_helper_det_policy_scratch : Pointer to DET helper scratch + * Return : returns true if recalculation is required, false otherwise. + */ +bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch); + +/* + * dml2_initialize_det_scratch - This function will initialize the DET scratch space as per requirements. + * @dml2 : Handle for dml2 context + * Return : None + */ +void dml2_initialize_det_scratch(struct dml2_context *in_ctx); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c new file mode 100644 index 000000000000..9deb03a18ccc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.c @@ -0,0 +1,704 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "display_mode_core.h" +#include "dml2_internal_types.h" +#include "dml2_utils.h" +#include "dml2_policy.h" +#include "dml2_translation_helper.h" +#include "dml2_mall_phantom.h" +#include "dml2_dc_resource_mgmt.h" +#include "dml21_wrapper.h" + +static void initialize_dml2_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_ip_params(dml2, in_dc, out); + else + dml2_translate_ip_params(in_dc, out); +} + +static void initialize_dml2_soc_bbox(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_socbb_params(dml2, in_dc, out); + else + dml2_translate_socbb_params(in_dc, out); +} + +static void initialize_dml2_soc_states(struct dml2_context *dml2, + const struct dc *in_dc, const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out) +{ + if (dml2->config.use_native_soc_bb_construction) + dml2_init_soc_states(dml2, in_dc, in_bbox, out); + else + dml2_translate_soc_states(in_dc, out, in_dc->dml.soc.num_states); +} + +static void map_hw_resources(struct dml2_context *dml2, + struct dml_display_cfg_st *in_out_display_cfg, struct dml_mode_support_info_st *mode_support_info) +{ + unsigned int num_pipes = 0; + int i, j; + + for (i = 0; i < __DML_NUM_PLANES__; i++) { + in_out_display_cfg->hw.ODMMode[i] = mode_support_info->ODMMode[i]; + in_out_display_cfg->hw.DPPPerSurface[i] = mode_support_info->DPPPerSurface[i]; + in_out_display_cfg->hw.DSCEnabled[i] = mode_support_info->DSCEnabled[i]; + in_out_display_cfg->hw.NumberOfDSCSlices[i] = mode_support_info->NumberOfDSCSlices[i]; + in_out_display_cfg->hw.DLGRefClkFreqMHz = 24; + if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 && + dml2->v20.dml_core_ctx.project != dml_project_dcn36 && + dml2->v20.dml_core_ctx.project != dml_project_dcn351) { + /*dGPU default as 50Mhz*/ + in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; + } + for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; + dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[num_pipes] = true; + num_pipes++; + } + } +} + +static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2, + const struct dml_display_cfg_st *display_cfg, + struct dml_mode_support_info_st *evaluation_info, + enum dc_validate_mode validate_mode) +{ + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + + s->mode_support_params.mode_lib = &dml2->v20.dml_core_ctx; + s->mode_support_params.in_display_cfg = display_cfg; + if (validate_mode == DC_VALIDATE_MODE_ONLY) + s->mode_support_params.in_start_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; + else + s->mode_support_params.in_start_state_idx = 0; + s->mode_support_params.out_evaluation_info = evaluation_info; + + memset(evaluation_info, 0, sizeof(struct dml_mode_support_info_st)); + s->mode_support_params.out_lowest_state_idx = 0; + + return dml_mode_support_ex(&s->mode_support_params); +} + +static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrapper_optimize_configuration_params *p) +{ + int unused_dpps = p->ip_params->max_num_dpp; + int i; + int odms_needed; + int largest_blend_and_timing = 0; + bool optimization_done = false; + + for (i = 0; i < (int) p->cur_display_config->num_timings; i++) { + if (p->cur_display_config->plane.BlendingAndTiming[i] > largest_blend_and_timing) + largest_blend_and_timing = p->cur_display_config->plane.BlendingAndTiming[i]; + } + + if (p->new_policy != p->cur_policy) + *p->new_policy = *p->cur_policy; + + if (p->new_display_config != p->cur_display_config) + *p->new_display_config = *p->cur_display_config; + + + // Optimize Clocks + if (!optimization_done) { + if (largest_blend_and_timing == 0 && p->cur_policy->ODMUse[0] == dml_odm_use_policy_combine_as_needed && dml2->config.minimize_dispclk_using_odm) { + odms_needed = dml2_util_get_maximum_odm_combine_for_output(dml2->config.optimize_odm_4to1, + p->cur_display_config->output.OutputEncoder[0], p->cur_mode_support_info->DSCEnabled[0]) - 1; + + if (odms_needed <= unused_dpps) { + if (odms_needed == 1) { + p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_2to1; + optimization_done = true; + } else if (odms_needed == 3) { + p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_4to1; + optimization_done = true; + } else + optimization_done = false; + } + } + } + + return optimization_done; +} + +static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state, + enum dc_validate_mode validate_mode) +{ + struct dml2_calculate_lowest_supported_state_for_temp_read_scratch *s = &dml2->v20.scratch.dml2_calculate_lowest_supported_state_for_temp_read_scratch; + struct dml2_wrapper_scratch *s_global = &dml2->v20.scratch; + + unsigned int dml_result = 0; + int result = -1, i, j; + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + /* Zero out before each call before proceeding */ + memset(s, 0, sizeof(struct dml2_calculate_lowest_supported_state_for_temp_read_scratch)); + memset(&s_global->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); + memset(&s_global->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + + for (i = 0; i < dml2->config.dcn_pipe_count; i++) { + /* Calling resource_build_scaling_params will populate the pipe params + * with the necessary information needed for correct DML calculations + * This is also done in DML1 driver code path and hence display_state + * cannot be const. + */ + struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!dml2->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + return false; + } + } + } + + map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); + + for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { + s->uclk_change_latencies[i] = dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us; + } + + for (i = 0; i < 4; i++) { + for (j = 0; j < dml2->v20.dml_core_ctx.states.num_states; j++) { + dml2->v20.dml_core_ctx.states.state_array[j].dram_clock_change_latency_us = s_global->dummy_pstate_table[i].dummy_pstate_latency_us; + } + + dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info, + validate_mode); + + if (dml_result && s->evaluation_info.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive) { + map_hw_resources(dml2, &s->cur_display_config, &s->evaluation_info); + dml_result = dml_mode_programming(&dml2->v20.dml_core_ctx, s_global->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + + ASSERT(dml_result); + + dml2_extract_watermark_set(&dml2->v20.g6_temp_read_watermark_set, &dml2->v20.dml_core_ctx); + dml2->v20.g6_temp_read_watermark_set.cstate_pstate.fclk_pstate_change_ns = dml2->v20.g6_temp_read_watermark_set.cstate_pstate.pstate_change_ns; + + result = s_global->mode_support_params.out_lowest_state_idx; + + while (dml2->v20.dml_core_ctx.states.state_array[result].dram_speed_mts < s_global->dummy_pstate_table[i].dram_speed_mts) + result++; + + break; + } + } + + for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) { + dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us = s->uclk_change_latencies[i]; + } + + return result; +} + +static void copy_dummy_pstate_table(struct dummy_pstate_entry *dest, struct dummy_pstate_entry *src, unsigned int num_entries) +{ + for (int i = 0; i < num_entries; i++) { + dest[i] = src[i]; + } +} + +static bool are_timings_requiring_odm_doing_blending(const struct dml_display_cfg_st *display_cfg, + const struct dml_mode_support_info_st *evaluation_info) +{ + unsigned int planes_per_timing[__DML_NUM_PLANES__] = {0}; + int i; + + for (i = 0; i < display_cfg->num_surfaces; i++) + planes_per_timing[display_cfg->plane.BlendingAndTiming[i]]++; + + for (i = 0; i < __DML_NUM_PLANES__; i++) { + if (planes_per_timing[i] > 1 && evaluation_info->ODMMode[i] != dml_odm_mode_bypass) + return true; + } + + return false; +} + +static bool does_configuration_meet_sw_policies(struct dml2_context *ctx, const struct dml_display_cfg_st *display_cfg, + const struct dml_mode_support_info_st *evaluation_info) +{ + bool pass = true; + + if (!ctx->config.enable_windowed_mpo_odm) { + if (are_timings_requiring_odm_doing_blending(display_cfg, evaluation_info)) + pass = false; + } + + return pass; +} + +static bool dml_mode_support_wrapper(struct dml2_context *dml2, + struct dc_state *display_state, + enum dc_validate_mode validate_mode) +{ + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + unsigned int result = 0, i; + unsigned int optimized_result = true; + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + /* Zero out before each call before proceeding */ + memset(&s->cur_display_config, 0, sizeof(struct dml_display_cfg_st)); + memset(&s->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st)); + memset(&s->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&s->optimize_configuration_params, 0, sizeof(struct dml2_wrapper_optimize_configuration_params)); + + for (i = 0; i < dml2->config.dcn_pipe_count; i++) { + /* Calling resource_build_scaling_params will populate the pipe params + * with the necessary information needed for correct DML calculations + * This is also done in DML1 driver code path and hence display_state + * cannot be const. + */ + struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!dml2->config.callbacks.build_scaling_params(pipe)) { + ASSERT(false); + return false; + } + } + } + + map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &s->cur_display_config); + + result = pack_and_call_dml_mode_support_ex(dml2, + &s->cur_display_config, + &s->mode_support_info, + validate_mode); + + if (result) + result = does_configuration_meet_sw_policies(dml2, &s->cur_display_config, &s->mode_support_info); + + // Try to optimize + if (result) { + s->cur_policy = dml2->v20.dml_core_ctx.policy; + s->optimize_configuration_params.dml_core_ctx = &dml2->v20.dml_core_ctx; + s->optimize_configuration_params.config = &dml2->config; + s->optimize_configuration_params.ip_params = &dml2->v20.dml_core_ctx.ip; + s->optimize_configuration_params.cur_display_config = &s->cur_display_config; + s->optimize_configuration_params.cur_mode_support_info = &s->mode_support_info; + s->optimize_configuration_params.cur_policy = &s->cur_policy; + s->optimize_configuration_params.new_display_config = &s->new_display_config; + s->optimize_configuration_params.new_policy = &s->new_policy; + + while (optimized_result && optimize_configuration(dml2, &s->optimize_configuration_params)) { + dml2->v20.dml_core_ctx.policy = s->new_policy; + optimized_result = pack_and_call_dml_mode_support_ex(dml2, + &s->new_display_config, + &s->mode_support_info, + validate_mode); + + if (optimized_result) + optimized_result = does_configuration_meet_sw_policies(dml2, &s->new_display_config, &s->mode_support_info); + + // If the new optimized state is supposed, then set current = new + if (optimized_result) { + s->cur_display_config = s->new_display_config; + s->cur_policy = s->new_policy; + } else { + // Else, restore policy to current + dml2->v20.dml_core_ctx.policy = s->cur_policy; + } + } + + // Optimize ended with a failed config, so we need to restore DML state to last passing + if (!optimized_result) { + result = pack_and_call_dml_mode_support_ex(dml2, + &s->cur_display_config, + &s->mode_support_info, + validate_mode); + } + } + + if (result) + map_hw_resources(dml2, &s->cur_display_config, &s->mode_support_info); + + return result; +} + +static bool call_dml_mode_support_and_programming(struct dc_state *context, enum dc_validate_mode validate_mode) +{ + unsigned int result = 0; + unsigned int min_state = 0; + int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + + struct dml2_context *dml2 = context->bw_ctx.dml2; + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context, + validate_mode); + + ASSERT(min_state_for_g6_temp_read >= 0); + } + + result = dml_mode_support_wrapper(dml2, context, validate_mode); + + /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. + * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. + */ + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } + return result; +} + +static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context, + enum dc_validate_mode validate_mode) +{ + struct dml2_context *dml2 = context->bw_ctx.dml2; + struct dml2_wrapper_scratch *s = &dml2->v20.scratch; + struct dml2_dcn_clocks out_clks; + unsigned int result = 0; + bool need_recalculation = false; + uint32_t cstate_enter_plus_exit_z8_ns; + + if (context->stream_count == 0) { + unsigned int lowest_state_idx = 0; + + out_clks.p_state_supported = true; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ + out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; + out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; + out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; + out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; + out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; + out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.dtbclk_en = false; + dml2_copy_clocks_to_dc_state(&out_clks, context); + return true; + } + + /* Zero out before each call before proceeding */ + memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); + memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); + memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); + memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); + + /* Initialize DET scratch */ + dml2_initialize_det_scratch(dml2); + + copy_dummy_pstate_table(s->dummy_pstate_table, in_dc->clk_mgr->bw_params->dummy_pstate_table, 4); + + result = call_dml_mode_support_and_programming(context, validate_mode); + /* Call map dc pipes to map the pipes based on the DML output. For correctly determining if recalculation + * is required or not, the resource context needs to correctly reflect the number of active pipes. We would + * only know the correct number if active pipes after dml2_map_dc_pipes is called. + */ + if (result && !dml2->config.skip_hw_state_mapping) + dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); + + /* Verify and update DET Buffer configuration if needed. dml2_verify_det_buffer_configuration will check if DET Buffer + * size needs to be updated. If yes it will update the DETOverride variable and set need_recalculation flag to true. + * Based on that flag, run mode support again. Verification needs to be run after dml_mode_programming because the getters + * return correct det buffer values only after dml_mode_programming is called. + */ + if (result && !dml2->config.skip_hw_state_mapping) { + need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); + if (need_recalculation) { + /* Engage the DML again if recalculation is required. */ + call_dml_mode_support_and_programming(context, validate_mode); + if (!dml2->config.skip_hw_state_mapping) { + dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); + } + need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); + ASSERT(need_recalculation == false); + } + } + + if (result) { + unsigned int lowest_state_idx = s->mode_support_params.out_lowest_state_idx; + out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.mp.Dispclk_calculated * 1000; + out_clks.p_state_supported = s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported; + if (in_dc->config.use_default_clock_table && + (lowest_state_idx < dml2->v20.dml_core_ctx.states.num_states - 1)) { + lowest_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; + out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + } + + out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; + out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; + out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; + out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000; + out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000; + out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(in_dc, context); + + if (!dml2->config.skip_hw_state_mapping) { + /* Call dml2_calculate_rq_and_dlg_params */ + dml2_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml2, in_dc->res_pool->pipe_count); + } + + dml2_copy_clocks_to_dc_state(&out_clks, context); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); + if (context->streams[0]->sink->link->dc->caps.is_apu) + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx); + else + memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); + dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx); + //copy for deciding zstate use + context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod; + + cstate_enter_plus_exit_z8_ns = context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns; + + if (context->bw_ctx.dml.vba.StutterPeriod < in_dc->debug.minimum_z8_residency_time && + cstate_enter_plus_exit_z8_ns < in_dc->debug.minimum_z8_residency_time * 1000) + cstate_enter_plus_exit_z8_ns = in_dc->debug.minimum_z8_residency_time * 1000; + + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns; + } + + return result; +} + +static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode validate_mode) +{ + struct dml2_context *dml2; + unsigned int result = 0; + + if (!context || context->stream_count == 0) + return true; + + dml2 = context->bw_ctx.dml2; + + /* Zero out before each call before proceeding */ + memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch)); + memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st)); + memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st)); + memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st)); + + build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); + + map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); + + result = pack_and_call_dml_mode_support_ex(dml2, + &dml2->v20.scratch.cur_display_config, + &dml2->v20.scratch.mode_support_info, + validate_mode); + + if (result) + result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info); + + return result == 1; +} + +static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) +{ + if (dc->debug.override_odm_optimization) { + dml2->config.minimize_dispclk_using_odm = dc->debug.minimize_dispclk_using_odm; + } +} + +bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, + enum dc_validate_mode validate_mode) +{ + bool out = false; + + if (!dml2) + return false; + dml2_apply_debug_options(in_dc, dml2); + + /* DML2.1 validation path */ + if (dml2->architecture == dml2_architecture_21) { + out = dml21_validate(in_dc, context, dml2, validate_mode); + return out; + } + + DC_FP_START(); + + /* Use dml_validate_only for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + out = dml2_validate_only(context, validate_mode); + else + out = dml2_validate_and_build_resource(in_dc, context, validate_mode); + + DC_FP_END(); + + return out; +} + +static inline struct dml2_context *dml2_allocate_memory(void) +{ + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); +} + +static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) +{ + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); + return; + } + + // Store config options + (*dml2)->config = *config; + + switch (in_dc->ctx->dce_version) { + case DCN_VERSION_3_5: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn35; + break; + case DCN_VERSION_3_51: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn351; + break; + case DCN_VERSION_3_6: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn36; + break; + case DCN_VERSION_3_2: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn32; + break; + case DCN_VERSION_3_21: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn321; + break; + case DCN_VERSION_4_01: + (*dml2)->v20.dml_core_ctx.project = dml_project_dcn401; + break; + default: + (*dml2)->v20.dml_core_ctx.project = dml_project_default; + break; + } + + DC_FP_START(); + + initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); + + initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); + + initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); + + DC_FP_END(); +} + +bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) +{ + // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) + return dml21_create(in_dc, dml2, config); + + // Allocate Mode Lib Ctx + *dml2 = dml2_allocate_memory(); + + if (!(*dml2)) + return false; + + dml2_init(in_dc, config, dml2); + + return true; +} + +void dml2_destroy(struct dml2_context *dml2) +{ + if (!dml2) + return; + + if (dml2->architecture == dml2_architecture_21) + dml21_destroy(dml2); + vfree(dml2); +} + +void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, + unsigned int *fclk_change_support, unsigned int *dram_clk_change_support) +{ + *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0]; + *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; +} + +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2) +{ + if (dml2->architecture == dml2_architecture_21) + dml21_prepare_mcache_programming(in_dc, context, dml2); +} + +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2) +{ + if (src_dml2->architecture == dml2_architecture_21) { + dml21_copy(dst_dml2, src_dml2); + return; + } + /* copy Mode Lib Ctx */ + memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context)); +} + +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2) +{ + if (src_dml2->architecture == dml2_architecture_21) + return dml21_create_copy(dst_dml2, src_dml2); + /* Allocate Mode Lib Ctx */ + *dst_dml2 = dml2_allocate_memory(); + + if (!(*dst_dml2)) + return false; + + /* copy Mode Lib Ctx */ + dml2_copy(*dst_dml2, src_dml2); + + return true; +} + +void dml2_reinit(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2) +{ + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); + return; + } + + dml2_init(in_dc, config, dml2); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h new file mode 100644 index 000000000000..c384e141cebc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml2_wrapper.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DML2_WRAPPER_H_ +#define _DML2_WRAPPER_H_ + +#include "os_types.h" + +#define DML2_MAX_NUM_DPM_LVL 30 + +struct dml2_context; +struct display_mode_lib_st; +struct dc; +struct pipe_ctx; +struct dc_plane_state; +struct dc_sink; +struct dc_stream_state; +struct resource_context; +struct display_stream_compressor; +struct dc_mcache_params; + +// Configuration of the MALL on the SoC +struct dml2_soc_mall_info { + // Cache line size of 0 means MALL is not enabled/present + unsigned int cache_line_size_bytes; + unsigned int cache_num_ways; + unsigned int max_cab_allocation_bytes; + + unsigned int mblk_width_pixels; + unsigned int mblk_size_bytes; + unsigned int mblk_height_4bpe_pixels; + unsigned int mblk_height_8bpe_pixels; +}; + +// Output of DML2 for clock requirements +struct dml2_dcn_clocks { + unsigned int dispclk_khz; + unsigned int dcfclk_khz; + unsigned int fclk_khz; + unsigned int uclk_mts; + unsigned int phyclk_khz; + unsigned int socclk_khz; + unsigned int ref_dtbclk_khz; + bool p_state_supported; + unsigned int cab_num_ways_required; + unsigned int dcfclk_khz_ds; +}; + +struct dml2_dc_callbacks { + struct dc *dc; + bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); + void (*build_test_pattern_params)(struct resource_context *res_ctx, struct pipe_ctx *otg_master); + bool (*can_support_mclk_switch_using_fw_based_vblank_stretch)(struct dc *dc, struct dc_state *context); + bool (*acquire_secondary_pipe_for_mpc_odm)(const struct dc *dc, struct dc_state *state, struct pipe_ctx *pri_pipe, struct pipe_ctx *sec_pipe, bool odm); + bool (*update_pipes_for_stream_with_slice_count)( + struct dc_state *new_ctx, + const struct dc_state *cur_ctx, + const struct resource_pool *pool, + const struct dc_stream_state *stream, + int new_slice_count); + bool (*update_pipes_for_plane_with_slice_count)( + struct dc_state *new_ctx, + const struct dc_state *cur_ctx, + const struct resource_pool *pool, + const struct dc_plane_state *plane, + int slice_count); + int (*get_odm_slice_index)(const struct pipe_ctx *opp_head); + int (*get_odm_slice_count)(const struct pipe_ctx *opp_head); + int (*get_mpc_slice_index)(const struct pipe_ctx *dpp_pipe); + int (*get_mpc_slice_count)(const struct pipe_ctx *dpp_pipe); + struct pipe_ctx *(*get_opp_head)(const struct pipe_ctx *pipe_ctx); + struct pipe_ctx *(*get_otg_master_for_stream)( + struct resource_context *res_ctx, + const struct dc_stream_state *stream); + int (*get_opp_heads_for_otg_master)(const struct pipe_ctx *otg_master, + struct resource_context *res_ctx, + struct pipe_ctx *opp_heads[MAX_PIPES]); + int (*get_dpp_pipes_for_plane)(const struct dc_plane_state *plane, + struct resource_context *res_ctx, + struct pipe_ctx *dpp_pipes[MAX_PIPES]); + struct dc_stream_status *(*get_stream_status)( + struct dc_state *state, + const struct dc_stream_state *stream); + struct dc_stream_state *(*get_stream_from_id)(const struct dc_state *state, unsigned int id); + unsigned int (*get_max_flickerless_instant_vtotal_increase)( + struct dc_stream_state *stream, + bool is_gaming); + bool (*allocate_mcache)(struct dc_state *context, const struct dc_mcache_params *mcache_params); +}; + +struct dml2_dc_svp_callbacks { + struct dc *dc; + bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); + struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); + struct dc_plane_state* (*create_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + enum dc_status (*add_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); + bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + bool (*remove_phantom_plane)(const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *context); + enum dc_status (*remove_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *plane); + void (*release_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); + enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx); + enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream); + struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream); + bool (*remove_phantom_streams_and_planes)( + const struct dc *dc, + struct dc_state *state); + void (*release_phantom_streams_and_planes)( + const struct dc *dc, + struct dc_state *state); + unsigned int (*calculate_mall_ways_from_bytes)( + const struct dc *dc, + unsigned int total_size_in_mall_bytes); +}; + +struct dml2_clks_table_entry { + unsigned int dcfclk_mhz; + unsigned int fclk_mhz; + unsigned int memclk_mhz; + unsigned int socclk_mhz; + unsigned int dtbclk_mhz; + unsigned int dispclk_mhz; + unsigned int dppclk_mhz; + unsigned int dram_speed_mts; /*which is based on wck_ratio*/ +}; + +struct dml2_clks_num_entries { + unsigned int num_dcfclk_levels; + unsigned int num_fclk_levels; + unsigned int num_memclk_levels; + unsigned int num_socclk_levels; + unsigned int num_dtbclk_levels; + unsigned int num_dispclk_levels; + unsigned int num_dppclk_levels; +}; + +struct dml2_clks_limit_table { + struct dml2_clks_table_entry clk_entries[DML2_MAX_NUM_DPM_LVL]; + struct dml2_clks_num_entries num_entries_per_clk; + unsigned int num_states; +}; + +// Various overrides, per ASIC or per SKU specific, or for debugging purpose when/if available +struct dml2_soc_bbox_overrides { + double xtalclk_mhz; + double dchub_refclk_mhz; + double dprefclk_mhz; + double disp_pll_vco_speed_mhz; + double urgent_latency_us; + double sr_exit_latency_us; + double sr_enter_plus_exit_latency_us; + double sr_exit_z8_time_us; + double sr_enter_plus_exit_z8_time_us; + double dram_clock_change_latency_us; + double fclk_change_latency_us; + unsigned int dram_num_chan; + unsigned int dram_chanel_width_bytes; + struct dml2_clks_limit_table clks_table; +}; + +enum dml2_force_pstate_methods { + dml2_force_pstate_method_auto = 0, + dml2_force_pstate_method_vactive, + dml2_force_pstate_method_vblank, + dml2_force_pstate_method_drr, + dml2_force_pstate_method_subvp, +}; + +struct dml2_configuration_options { + int dcn_pipe_count; + bool use_native_pstate_optimization; + bool enable_windowed_mpo_odm; + bool use_native_soc_bb_construction; + bool skip_hw_state_mapping; + bool optimize_odm_4to1; + bool minimize_dispclk_using_odm; + bool override_det_buffer_size_kbytes; + struct dml2_dc_callbacks callbacks; + struct { + bool force_disable_subvp; + bool force_enable_subvp; + unsigned int subvp_fw_processing_delay_us; + unsigned int subvp_pstate_allow_width_us; + unsigned int subvp_prefetch_end_to_mall_start_us; + unsigned int subvp_swath_height_margin_lines; + struct dml2_dc_svp_callbacks callbacks; + } svp_pstate; + struct dml2_soc_mall_info mall_cfg; + struct dml2_soc_bbox_overrides bbox_overrides; + unsigned int max_segments_per_hubp; + unsigned int det_segment_size; + /* Only for debugging purposes when initializing SOCBB params via tool for DML21. */ + struct socbb_ip_params_external *external_socbb_ip_params; + struct { + bool force_pstate_method_enable; + enum dml2_force_pstate_methods force_pstate_method_values[MAX_PIPES]; + } pmo; + bool map_dc_pipes_with_callbacks; + + bool use_clock_dc_limits; + bool gpuvm_enable; + bool force_tdlut_enable; + void *bb_from_dmub; +}; + +/* + * dml2_create - Creates dml2_context. + * @in_dc: dc. + * @config: dml2 configuration options. + * @dml2: Created dml2 context. + * + * Create and destroy of DML2 is done as part of dc_state creation + * and dc_state_free. DML2 IP, SOC and STATES are initialized at + * creation time. + * + * Return: True if dml2 is successfully created, false otherwise. + */ +bool dml2_create(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2); + +void dml2_destroy(struct dml2_context *dml2); +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2); +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2); +void dml2_reinit(const struct dc *in_dc, + const struct dml2_configuration_options *config, + struct dml2_context **dml2); + +/* + * dml2_validate - Determines if a display configuration is supported or not. + * @in_dc: dc. + * @context: dc_state to be validated. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX will not populate context.res_ctx. + * + * DML1.0 compatible interface for validation. + * + * Based on fast_validate option internally would call: + * + * -dml2_validate_and_build_resource - for non fast_validate option + * Calculates if dc_state can be supported on the SOC, and attempts to + * optimize the power management feature supports versus minimum clocks. + * If supported, also builds out_new_hw_state to represent the hw programming + * for the new dc state. + * + * -dml2_validate_only - for fast_validate option + * Calculates if dc_state can be supported on the SOC (i.e. at maximum + * clocks) with all mandatory power features enabled. + + * Context: Two threads may not invoke this function concurrently unless they reference + * separate dc_states for validation. + * Return: True if mode is supported, false otherwise. + */ +bool dml2_validate(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml2, + enum dc_validate_mode validate_mode); + +/* + * dml2_extract_dram_and_fclk_change_support - Extracts the FCLK and UCLK change support info. + * @dml2: input dml2 context pointer. + * @fclk_change_support: output pointer holding the fclk change support info (vactive, vblank, unsupported). + * @dram_clk_change_support: output pointer holding the uclk change support info (vactive, vblank, unsupported). + */ +void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, + unsigned int *fclk_change_support, unsigned int *dram_clk_change_support); +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2); +#endif //_DML2_WRAPPER_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h new file mode 100644 index 000000000000..17f0972b1af7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_assert.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_ASSERT_H__ +#define __DML_ASSERT_H__ + +#include "os_types.h" + +#endif //__DML_ASSERT_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h new file mode 100644 index 000000000000..d459f93cf40b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_depedencies.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +/* This header intentinally does not include an #ifdef guard as it only contains includes for other headers*/ + +/* + * Standard Types + */ +#include "os_types.h" +#include "cmntypes.h" + diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c new file mode 100644 index 000000000000..00d22e542469 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.c @@ -0,0 +1,573 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "dml_display_rq_dlg_calc.h" +#include "display_mode_core.h" +#include "display_mode_util.h" + +static dml_bool_t is_dual_plane(enum dml_source_format_class source_format) +{ + dml_bool_t ret_val = 0; + + if ((source_format == dml_420_12) || (source_format == dml_420_8) || (source_format == dml_420_10) || (source_format == dml_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; + enum dml_swizzle_mode sw_mode = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[plane_idx]; + dml_bool_t dual_plane = is_dual_plane((enum dml_source_format_class)(source_format)); + + uint32 pixel_chunk_bytes = 0; + uint32 min_pixel_chunk_bytes = 0; + uint32 meta_chunk_bytes = 0; + uint32 min_meta_chunk_bytes = 0; + uint32 dpte_group_bytes = 0; + uint32 mpte_group_bytes = 0; + + uint32 p1_pixel_chunk_bytes = 0; + uint32 p1_min_pixel_chunk_bytes = 0; + uint32 p1_meta_chunk_bytes = 0; + uint32 p1_min_meta_chunk_bytes = 0; + uint32 p1_dpte_group_bytes = 0; + uint32 p1_mpte_group_bytes = 0; + + dml_uint_t detile_buf_size_in_bytes; + dml_uint_t detile_buf_plane1_addr = 0; + + dml_float_t stored_swath_l_bytes; + dml_float_t stored_swath_c_bytes; + dml_bool_t is_phantom_pipe; + + dml_uint_t pte_row_height_linear; + + dml_print("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + memset(rq_regs, 0, sizeof(*rq_regs)); + + pixel_chunk_bytes = (dml_uint_t)(dml_get_pixel_chunk_size_in_kbyte(mode_lib) * 1024); + min_pixel_chunk_bytes = (dml_uint_t)(dml_get_min_pixel_chunk_size_in_byte(mode_lib)); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + meta_chunk_bytes = (dml_uint_t)(dml_get_meta_chunk_size_in_kbyte(mode_lib) * 1024); + min_meta_chunk_bytes = (dml_uint_t)(dml_get_min_meta_chunk_size_in_byte(mode_lib)); + + dpte_group_bytes = (dml_uint_t)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); + mpte_group_bytes = (dml_uint_t)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_meta_chunk_bytes = meta_chunk_bytes; + p1_min_meta_chunk_bytes = min_meta_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml_rgbe_alpha) + p1_pixel_chunk_bytes = (dml_uint_t)(dml_get_alpha_pixel_chunk_size_in_kbyte(mode_lib) * 1024); + + rq_regs->rq_regs_l.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) pixel_chunk_bytes) - 10); + rq_regs->rq_regs_c.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_pixel_chunk_bytes) - 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_pixel_chunk_bytes) - 8 + 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_pixel_chunk_bytes) - 8 + 1); + + rq_regs->rq_regs_l.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) meta_chunk_bytes) - 10); + rq_regs->rq_regs_c.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_meta_chunk_bytes) - 10); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_l.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_l.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_meta_chunk_bytes) - 6 + 1); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_c.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_c.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_meta_chunk_bytes) - 6 + 1); + + rq_regs->rq_regs_l.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) dpte_group_bytes) - 6); + rq_regs->rq_regs_l.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) mpte_group_bytes) - 6); + rq_regs->rq_regs_c.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_dpte_group_bytes) - 6); + rq_regs->rq_regs_c.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_mpte_group_bytes) - 6); + + detile_buf_size_in_bytes = (dml_uint_t)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); + + pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); + + if (sw_mode == dml_sw_linear) + ASSERT(pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) pte_row_height_linear), 1) - 3); + + if (dual_plane) { + dml_uint_t p1_pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); + if (sw_mode == dml_sw_linear) + ASSERT(p1_pte_row_height_linear >= 8); + + rq_regs->rq_regs_c.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) p1_pte_row_height_linear), 1) - 3); + } + + rq_regs->rq_regs_l.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_l(mode_lib, pipe_idx))); + rq_regs->rq_regs_c.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_c(mode_lib, pipe_idx))); + + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); + stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); + is_phantom_pipe = dml_get_is_phantom_pipe(mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (dml_uint_t)((1024.0*1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (dml_uint_t)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (dml_uint_t)(dml_round_to_multiple((dml_uint_t)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + dml_print_rq_regs_st(rq_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. + + +void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, + dml_display_ttu_regs_st *disp_ttu_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx) +{ + dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx]; + struct dml_timing_cfg_st *timing = &mode_lib->ms.cache_display_cfg.timing; + struct dml_plane_cfg_st *plane = &mode_lib->ms.cache_display_cfg.plane; + struct dml_hw_resource_st *hw = &mode_lib->ms.cache_display_cfg.hw; + dml_bool_t dual_plane = is_dual_plane(source_format); + dml_uint_t num_cursors = plane->NumberOfCursors[plane_idx]; + enum dml_odm_mode odm_mode = hw->ODMMode[plane_idx]; + + dml_uint_t htotal = timing->HTotal[plane_idx]; + dml_uint_t hactive = timing->HActive[plane_idx]; + dml_uint_t hblank_end = timing->HBlankEnd[plane_idx]; + dml_uint_t vblank_end = timing->VBlankEnd[plane_idx]; + dml_bool_t interlaced = timing->Interlace[plane_idx]; + dml_float_t pclk_freq_in_mhz = (dml_float_t) timing->PixelClock[plane_idx]; + dml_float_t refclk_freq_in_mhz = (hw->DLGRefClkFreqMHz > 0) ? (dml_float_t) hw->DLGRefClkFreqMHz : mode_lib->soc.refclk_mhz; + dml_float_t ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + dml_uint_t vready_after_vcount0; + + dml_uint_t dst_x_after_scaler; + dml_uint_t dst_y_after_scaler; + + dml_float_t dst_y_prefetch; + dml_float_t dst_y_per_vm_vblank; + dml_float_t dst_y_per_row_vblank; + dml_float_t dst_y_per_vm_flip; + dml_float_t dst_y_per_row_flip; + + dml_float_t max_dst_y_per_vm_vblank = 32.0; //U5.2 + dml_float_t max_dst_y_per_row_vblank = 16.0; //U4.2 + + dml_float_t vratio_pre_l; + dml_float_t vratio_pre_c; + + dml_float_t refcyc_per_line_delivery_pre_l; + dml_float_t refcyc_per_line_delivery_l; + dml_float_t refcyc_per_line_delivery_pre_c = 0.; + dml_float_t refcyc_per_line_delivery_c = 0.; + dml_float_t refcyc_per_req_delivery_pre_l; + dml_float_t refcyc_per_req_delivery_l; + dml_float_t refcyc_per_req_delivery_pre_c = 0.; + dml_float_t refcyc_per_req_delivery_c = 0.; + dml_float_t refcyc_per_req_delivery_pre_cur0 = 0.; + dml_float_t refcyc_per_req_delivery_cur0 = 0.; + + dml_float_t dst_y_per_pte_row_nom_l; + dml_float_t dst_y_per_pte_row_nom_c; + dml_float_t dst_y_per_meta_row_nom_l; + dml_float_t dst_y_per_meta_row_nom_c; + dml_float_t refcyc_per_pte_group_nom_l; + dml_float_t refcyc_per_pte_group_nom_c; + dml_float_t refcyc_per_pte_group_vblank_l; + dml_float_t refcyc_per_pte_group_vblank_c; + dml_float_t refcyc_per_pte_group_flip_l; + dml_float_t refcyc_per_pte_group_flip_c; + dml_float_t refcyc_per_meta_chunk_nom_l; + dml_float_t refcyc_per_meta_chunk_nom_c; + dml_float_t refcyc_per_meta_chunk_vblank_l; + dml_float_t refcyc_per_meta_chunk_vblank_c; + dml_float_t refcyc_per_meta_chunk_flip_l; + dml_float_t refcyc_per_meta_chunk_flip_c; + + dml_float_t temp; + dml_float_t min_ttu_vblank; + dml_uint_t min_dst_y_next_start; + + dml_print("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + dml_print("DML_DLG::%s: plane_idx = %d\n", __func__, plane_idx); + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: hw->DLGRefClkFreqMHz = %3.2f\n", __func__, hw->DLGRefClkFreqMHz); + dml_print("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.refclk_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + ASSERT(refclk_freq_in_mhz != 0); + ASSERT(pclk_freq_in_mhz != 0); + ASSERT(ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (odm_mode == dml_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)((dml_float_t) hblank_end * ref_freq_to_pix_freq); + } else if (odm_mode == dml_odm_mode_combine_2to1 || odm_mode == dml_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + dml_uint_t num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg); + dml_uint_t first_pipe_idx_in_plane = __DML_NUM_PLANES__; + dml_uint_t pipe_idx_in_combine = 0; // pipe index within the plane + dml_uint_t odm_combine_factor = (odm_mode == dml_odm_mode_combine_2to1 ? 2 : 4); + + for (dml_uint_t i = 0; i < num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == plane_idx) { + if (i < first_pipe_idx_in_plane) { + first_pipe_idx_in_plane = i; + } + } + } + pipe_idx_in_combine = pipe_idx - first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)(((dml_float_t) hblank_end + (dml_float_t) pipe_idx_in_combine * (dml_float_t) hactive / (dml_float_t) odm_combine_factor) * ref_freq_to_pix_freq); + dml_print("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + dml_print("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, first_pipe_idx_in_plane); + dml_print("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, pipe_idx_in_combine); + dml_print("DML_DLG: %s: odm_combine_factor = %d\n", __func__, odm_combine_factor); + } + dml_print("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (dml_uint_t)dml_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (dml_uint_t)(ref_freq_to_pix_freq * dml_pow(2, 19)); + temp = dml_pow(2, 8); + disp_dlg_regs->refcyc_per_htotal = (dml_uint_t)(ref_freq_to_pix_freq * (dml_float_t)htotal * temp); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + + min_ttu_vblank = dml_get_min_ttu_vblank_in_us(mode_lib, pipe_idx); + min_dst_y_next_start = (dml_uint_t)(dml_get_min_dst_y_next_start(mode_lib, pipe_idx)); + + dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq); + + vready_after_vcount0 = (dml_uint_t)(dml_get_vready_at_or_after_vsync(mode_lib, pipe_idx)); + disp_dlg_regs->vready_after_vcount0 = vready_after_vcount0; + + dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + dst_x_after_scaler = (dml_uint_t)(dml_get_dst_x_after_scaler(mode_lib, pipe_idx)); + dst_y_after_scaler = (dml_uint_t)(dml_get_dst_y_after_scaler(mode_lib, pipe_idx)); + + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler); + + dst_y_prefetch = dml_get_dst_y_prefetch(mode_lib, pipe_idx); + dst_y_per_vm_vblank = dml_get_dst_y_per_vm_vblank(mode_lib, pipe_idx); + dst_y_per_row_vblank = dml_get_dst_y_per_row_vblank(mode_lib, pipe_idx); + dst_y_per_vm_flip = dml_get_dst_y_per_vm_flip(mode_lib, pipe_idx); + dst_y_per_row_flip = dml_get_dst_y_per_row_flip(mode_lib, pipe_idx); + + // magic! + if (htotal <= 75) { + max_dst_y_per_vm_vblank = 100.0; + max_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip); + dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip); + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank); + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + + vratio_pre_l = dml_get_vratio_prefetch_l(mode_lib, pipe_idx); + vratio_pre_c = dml_get_vratio_prefetch_c(mode_lib, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c); + + // Active + refcyc_per_line_delivery_pre_l = dml_get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_line_delivery_l = dml_get_refcyc_per_line_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = dml_get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_line_delivery_c = dml_get_refcyc_per_line_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (dml_uint_t)(dml_get_refcyc_per_vm_dmdata_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (dml_uint_t)(dml_get_dmdata_dl_delta_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + + refcyc_per_req_delivery_pre_l = dml_get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_l = dml_get_refcyc_per_req_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = dml_get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_c = dml_get_refcyc_per_req_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c); + } + + // TTU - Cursor + ASSERT(num_cursors <= 1); + if (num_cursors > 0) { + refcyc_per_req_delivery_pre_cur0 = dml_get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_req_delivery_cur0 = dml_get_refcyc_per_cursor_req_delivery_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0); + } + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (dml_uint_t)((dml_float_t) min_dst_y_next_start * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (dml_uint_t)dml_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (dml_uint_t)((dml_float_t) dst_x_after_scaler * ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (dml_uint_t)(dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (dml_uint_t)(dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (dml_uint_t)(dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (dml_uint_t)(dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (dml_uint_t)(dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (dml_uint_t)(vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (dml_uint_t)(vratio_pre_c * dml_pow(2, 19)); + + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_get_refcyc_per_vm_req_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); + + dst_y_per_pte_row_nom_l = dml_get_dst_y_per_pte_row_nom_l(mode_lib, pipe_idx); + dst_y_per_pte_row_nom_c = dml_get_dst_y_per_pte_row_nom_c(mode_lib, pipe_idx); + dst_y_per_meta_row_nom_l = dml_get_dst_y_per_meta_row_nom_l(mode_lib, pipe_idx); + dst_y_per_meta_row_nom_c = dml_get_dst_y_per_meta_row_nom_c(mode_lib, pipe_idx); + + refcyc_per_pte_group_nom_l = dml_get_refcyc_per_pte_group_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_nom_c = dml_get_refcyc_per_pte_group_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_vblank_l = dml_get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_vblank_c = dml_get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_flip_l = dml_get_refcyc_per_pte_group_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_pte_group_flip_c = dml_get_refcyc_per_pte_group_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + refcyc_per_meta_chunk_nom_l = dml_get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_nom_c = dml_get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_vblank_l = dml_get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_vblank_c = dml_get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_flip_l = dml_get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + refcyc_per_meta_chunk_flip_c = dml_get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (dml_uint_t)(dst_y_per_pte_row_nom_l * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (dml_uint_t)(dst_y_per_pte_row_nom_c * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_l = (dml_uint_t)(dst_y_per_meta_row_nom_l * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (dml_uint_t)(dst_y_per_meta_row_nom_c * dml_pow(2, 2)); + disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (dml_uint_t)(refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (dml_uint_t)(refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (dml_uint_t)(refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (dml_uint_t)(refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (dml_uint_t)(refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (dml_uint_t)(refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (dml_uint_t)(refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (dml_uint_t)(refcyc_per_meta_chunk_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_c, 1); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (dml_uint_t)(refcyc_per_req_delivery_pre_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (dml_uint_t)(refcyc_per_req_delivery_l * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (dml_uint_t)(refcyc_per_req_delivery_pre_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (dml_uint_t)(refcyc_per_req_delivery_c * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (dml_uint_t)(refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (dml_uint_t)(refcyc_per_req_delivery_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0; + disp_ttu_regs->refcyc_per_req_delivery_cur1 = 0; + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (dml_uint_t)(4.0 * (dml_float_t)htotal * ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + disp_ttu_regs->min_ttu_vblank = (dml_uint_t)(min_ttu_vblank * refclk_freq_in_mhz); + + // CHECK for HW registers' range, assert or clamp + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_pow(2, 23) - 1); + + + ASSERT(disp_dlg_regs->dst_y_after_scaler < (dml_uint_t)8); + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (dml_uint_t)dml_pow(2, 17)); + if (dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (dml_uint_t)dml_pow(2, 17)) { // FIXME what so special about chroma, can we just assert? + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (dml_uint_t)dml_pow(2, 17) - 1); + } + } + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (dml_uint_t)dml_pow(2, 17)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (dml_uint_t)dml_pow(2, 17)); + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); + if (dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); + } + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (dml_uint_t)dml_pow(2, 13)); + if (dual_plane) { + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (dml_uint_t)dml_pow(2, 13)); + } + + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1); + if (dual_plane) { + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (dml_uint_t)dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1); + } + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (dml_uint_t)dml_pow(2, 13)); + ASSERT(disp_ttu_regs->qos_level_low_wm < (dml_uint_t) dml_pow(2, 14)); + ASSERT(disp_ttu_regs->qos_level_high_wm < (dml_uint_t) dml_pow(2, 14)); + ASSERT(disp_ttu_regs->min_ttu_vblank < (dml_uint_t) dml_pow(2, 24)); + + dml_print_ttu_regs_st(disp_ttu_regs); + dml_print_dlg_regs_st(disp_dlg_regs); + dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param) +{ + memset(arb_param, 0, sizeof(*arb_param)); + arb_param->max_req_outstanding = 256; + arb_param->min_req_outstanding = 256; // turn off the sat level feature if this set to max + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = 2 * 8; // assume max data chunk size of 8K +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h new file mode 100644 index 000000000000..bf491cf0582d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_display_rq_dlg_calc.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DML_DISPLAY_RQ_DLG_CALC_H__ +#define __DML_DISPLAY_RQ_DLG_CALC_H__ + +#include "display_mode_core_structs.h" +#include "display_mode_lib_defines.h" + +struct display_mode_lib_st; + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls and fucntions to calculate +// and then populate the rq_regs struct +// Input: +// Assume mode_program is already called +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: + +void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx); + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// Assume mode_program is already called +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *dlg_regs, + dml_display_ttu_regs_st *ttu_regs, + struct display_mode_lib_st *mode_lib, + const dml_uint_t pipe_idx); + +// Function: dml_rq_dlg_get_arb_params +void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h new file mode 100644 index 000000000000..7fadbe6d7af4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml_logging.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_LOGGING_H__ +#define __DML_LOGGING_H__ + +#define dml_print(...) ((void)0) + +#endif //__DML_LOGGING_H__ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index fdabbeec8ffa..4570b8016de5 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -31,7 +31,7 @@ #include "dcn30/dcn30_hubp.h" #include "dcn31/dcn31_hubp.h" #include "dcn32/dcn32_hubp.h" -#include "dml2/dml21/inc/dml_top_dchub_registers.h" +#include "dml2_0/dml21/inc/dml_top_dchub_registers.h" #define HUBP_3DLUT_FL_REG_LIST_DCN401(inst)\ SRI_ARR_US(_3DLUT_FL_CONFIG, HUBP, inst),\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d11893f8c916..3fe590436e74 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -58,8 +58,8 @@ #include "transform.h" #include "dpp.h" -#include "dml2/dml21/inc/dml_top_dchub_registers.h" -#include "dml2/dml21/inc/dml_top_types.h" +#include "dml2_0/dml21/inc/dml_top_dchub_registers.h" +#include "dml2_0/dml21/inc/dml_top_types.h" struct resource_pool; struct dc_state; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index b0c13b506c11..c62c4c1ed7c3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -41,8 +41,8 @@ #include "mem_input.h" #include "cursor_reg_cache.h" -#include "dml2/dml21/inc/dml_top_dchub_registers.h" -#include "dml2/dml21/inc/dml_top_types.h" +#include "dml2_0/dml21/inc/dml_top_dchub_registers.h" +#include "dml2_0/dml21/inc/dml_top_types.h" #define OPP_ID_INVALID 0xf #define MAX_TTU 0xffffff diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 42fbc70f7056..d468bc85566a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -29,7 +29,7 @@ #include "include/grph_object_id.h" #include "dml/display_mode_structs.h" -#include "dml2/dml21/inc/dml_top_dchub_registers.h" +#include "dml2_0/dml21/inc/dml_top_dchub_registers.h" struct dchub_init_data; struct cstate_pstate_watermarks_st { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 3965a7f1b64b..4c2e8aa96d1f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -92,7 +92,7 @@ #include "dc_state_priv.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index fff57f23f4f7..3a83a63d2116 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -33,7 +33,7 @@ #include "resource.h" #include "include/irq_service_interface.h" #include "dcn35_resource.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 0abd163b425e..d00cc4030be3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -83,7 +83,7 @@ #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #include "link_enc_cfg.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index ca125ee6c2fb..f4b28617b2de 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -11,7 +11,7 @@ #include "resource.h" #include "include/irq_service_interface.h" #include "dcn36_resource.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 1d18807e4749..8e2d3781752c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -73,7 +73,7 @@ #include "dc_state_priv.h" -#include "dml2/dml2_wrapper.h" +#include "dml2_0/dml2_wrapper.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h index 21d842857601..88c11b6be004 100644 --- a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h @@ -9,7 +9,7 @@ #include "dc.h" #include "clk_mgr.h" #include "soc_and_ip_translator.h" -#include "dml2/dml21/inc/dml_top_soc_parameter_types.h" +#include "dml2_0/dml21/inc/dml_top_soc_parameter_types.h" void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator); -- cgit v1.2.3 From dbf58a9dad4d80286c5c17e199f595eb0dd3be5a Mon Sep 17 00:00:00 2001 From: Karen Chen Date: Wed, 15 Oct 2025 11:13:07 -0400 Subject: drm/amd/display: Add more DC HW state info to underflow logging [Why] Debugging underflow issues frequently requires knowing the HW state at the time of underflow. To enable capturing this HW state information, interface functions are needed for the various DC HW blocks. [How] This change adds the interface functions to read HW state for the following DC HW blocks: - HUBBUB - HUBP - DPP - MPC - OPP - DSC - OPTC - DCCG Reviewed-by: George Shen Signed-off-by: Karen Chen Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 30 +++-- .../gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h | 64 ++++++++- .../gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c | 123 +++++++++++++++++ .../gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h | 2 + .../drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c | 3 +- .../drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h | 3 +- .../gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c | 1 + .../gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h | 5 +- .../drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c | 1 + .../gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h | 4 +- .../gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 16 +++ .../gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h | 2 + .../gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c | 1 + .../gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c | 1 + .../gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c | 1 + .../gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c | 8 ++ .../gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h | 1 + .../gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c | 1 + .../gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c | 1 + drivers/gpu/drm/amd/display/dc/dsc/dsc.h | 5 + .../drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c | 33 +---- .../drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h | 6 +- .../drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c | 3 +- .../drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c | 3 +- .../drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c | 3 +- .../amd/display/dc/hubbub/dcn401/dcn401_hubbub.c | 3 +- .../gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h | 136 ++++++++++++++++++- .../gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h | 8 +- .../gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c | 147 +++++++++++++++++---- .../gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h | 2 + .../gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c | 4 +- .../gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c | 4 +- .../gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c | 4 +- .../drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 4 +- .../drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 76 ++++++----- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 121 ++++++++++++++++- drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 12 +- drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 16 ++- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 27 ++++ drivers/gpu/drm/amd/display/dc/inc/hw/opp.h | 13 ++ .../drm/amd/display/dc/inc/hw/timing_generator.h | 130 ++++++++++++++++++ .../gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c | 16 +++ .../gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h | 5 + .../gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c | 1 + .../gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c | 1 + .../gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c | 14 +- .../gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h | 6 +- .../gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c | 13 ++ .../gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h | 6 +- .../gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c | 13 ++ .../gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h | 4 +- .../gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h | 38 +++++- .../gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c | 131 ++++++++++++++++++ .../gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h | 2 + .../drm/amd/display/dc/optc/dcn314/dcn314_optc.c | 1 + .../gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 1 + .../gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 1 + .../drm/amd/display/dc/optc/dcn401/dcn401_optc.c | 1 + 59 files changed, 1133 insertions(+), 150 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 182a35f6b439..93b39710db53 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -54,6 +54,14 @@ struct abm_save_restore; struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; +struct dcn_hubbub_reg_state; +struct dcn_hubp_reg_state; +struct dcn_dpp_reg_state; +struct dcn_mpc_reg_state; +struct dcn_opp_reg_state; +struct dcn_dsc_reg_state; +struct dcn_optc_reg_state; +struct dcn_dccg_reg_state; #define DC_VER "3.2.355" @@ -1831,20 +1839,14 @@ struct dc_surface_update { }; struct dc_underflow_debug_data { - uint32_t otg_inst; - uint32_t otg_underflow; - uint32_t h_position; - uint32_t v_position; - uint32_t otg_frame_count; - struct dc_underflow_per_hubp_debug_data { - uint32_t hubp_underflow; - uint32_t hubp_in_blank; - uint32_t hubp_readline; - uint32_t det_config_error; - } hubps[MAX_PIPES]; - uint32_t curr_det_sizes[MAX_PIPES]; - uint32_t target_det_sizes[MAX_PIPES]; - uint32_t compbuf_config_error; + struct dcn_hubbub_reg_state *hubbub_reg_state; + struct dcn_hubp_reg_state *hubp_reg_state[MAX_PIPES]; + struct dcn_dpp_reg_state *dpp_reg_state[MAX_PIPES]; + struct dcn_mpc_reg_state *mpc_reg_state[MAX_PIPES]; + struct dcn_opp_reg_state *opp_reg_state[MAX_PIPES]; + struct dcn_dsc_reg_state *dsc_reg_state[MAX_PIPES]; + struct dcn_optc_reg_state *optc_reg_state[MAX_PIPES]; + struct dcn_dccg_reg_state *dccg_reg_state[MAX_PIPES]; }; /* diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index a9b88f5e0c04..8bdffd9ff31b 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -425,7 +425,69 @@ struct dccg_mask { uint32_t SYMCLKD_CLOCK_ENABLE; \ uint32_t SYMCLKE_CLOCK_ENABLE; \ uint32_t DP_DTO_MODULO[MAX_PIPES]; \ - uint32_t DP_DTO_PHASE[MAX_PIPES] + uint32_t DP_DTO_PHASE[MAX_PIPES]; \ + uint32_t DC_MEM_GLOBAL_PWR_REQ_CNTL; \ + uint32_t DCCG_AUDIO_DTO0_MODULE; \ + uint32_t DCCG_AUDIO_DTO0_PHASE; \ + uint32_t DCCG_AUDIO_DTO1_MODULE; \ + uint32_t DCCG_AUDIO_DTO1_PHASE; \ + uint32_t DCCG_CAC_STATUS; \ + uint32_t DCCG_CAC_STATUS2; \ + uint32_t DCCG_DISP_CNTL_REG; \ + uint32_t DCCG_DS_CNTL; \ + uint32_t DCCG_DS_DTO_INCR; \ + uint32_t DCCG_DS_DTO_MODULO; \ + uint32_t DCCG_DS_HW_CAL_INTERVAL; \ + uint32_t DCCG_GTC_CNTL; \ + uint32_t DCCG_GTC_CURRENT; \ + uint32_t DCCG_GTC_DTO_INCR; \ + uint32_t DCCG_GTC_DTO_MODULO; \ + uint32_t DCCG_PERFMON_CNTL; \ + uint32_t DCCG_PERFMON_CNTL2; \ + uint32_t DCCG_SOFT_RESET; \ + uint32_t DCCG_TEST_CLK_SEL; \ + uint32_t DCCG_VSYNC_CNT_CTRL; \ + uint32_t DCCG_VSYNC_CNT_INT_CTRL; \ + uint32_t DCCG_VSYNC_OTG0_LATCH_VALUE; \ + uint32_t DCCG_VSYNC_OTG1_LATCH_VALUE; \ + uint32_t DCCG_VSYNC_OTG2_LATCH_VALUE; \ + uint32_t DCCG_VSYNC_OTG3_LATCH_VALUE; \ + uint32_t DCCG_VSYNC_OTG4_LATCH_VALUE; \ + uint32_t DCCG_VSYNC_OTG5_LATCH_VALUE; \ + uint32_t DISPCLK_CGTT_BLK_CTRL_REG; \ + uint32_t DP_DTO_DBUF_EN; \ + uint32_t DPIACLK_540M_DTO_MODULO; \ + uint32_t DPIACLK_540M_DTO_PHASE; \ + uint32_t DPIACLK_810M_DTO_MODULO; \ + uint32_t DPIACLK_810M_DTO_PHASE; \ + uint32_t DPIACLK_DTO_CNTL; \ + uint32_t DPIASYMCLK_CNTL; \ + uint32_t DPPCLK_CGTT_BLK_CTRL_REG; \ + uint32_t DPREFCLK_CGTT_BLK_CTRL_REG; \ + uint32_t DPREFCLK_CNTL; \ + uint32_t DTBCLK_DTO_DBUF_EN; \ + uint32_t FORCE_SYMCLK_DISABLE; \ + uint32_t HDMICHARCLK0_CLOCK_CNTL; \ + uint32_t MICROSECOND_TIME_BASE_DIV; \ + uint32_t MILLISECOND_TIME_BASE_DIV; \ + uint32_t OTG0_PHYPLL_PIXEL_RATE_CNTL; \ + uint32_t OTG0_PIXEL_RATE_CNTL; \ + uint32_t OTG1_PHYPLL_PIXEL_RATE_CNTL; \ + uint32_t OTG1_PIXEL_RATE_CNTL; \ + uint32_t OTG2_PHYPLL_PIXEL_RATE_CNTL; \ + uint32_t OTG2_PIXEL_RATE_CNTL; \ + uint32_t OTG3_PHYPLL_PIXEL_RATE_CNTL; \ + uint32_t OTG3_PIXEL_RATE_CNTL; \ + uint32_t PHYPLLA_PIXCLK_RESYNC_CNTL; \ + uint32_t PHYPLLB_PIXCLK_RESYNC_CNTL; \ + uint32_t PHYPLLC_PIXCLK_RESYNC_CNTL; \ + uint32_t PHYPLLD_PIXCLK_RESYNC_CNTL; \ + uint32_t PHYPLLE_PIXCLK_RESYNC_CNTL; \ + uint32_t REFCLK_CGTT_BLK_CTRL_REG; \ + uint32_t SOCCLK_CGTT_BLK_CTRL_REG; \ + uint32_t SYMCLK_CGTT_BLK_CTRL_REG; \ + uint32_t SYMCLK_PSP_CNTL + struct dccg_registers { DCCG_REG_VARIABLE_LIST; }; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c index 8664f0c4c9b7..97df04b7e39d 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c @@ -709,6 +709,128 @@ void dccg31_otg_drop_pixel(struct dccg *dccg, OTG_DROP_PIXEL[otg_inst], 1); } +void dccg31_read_reg_state(struct dccg *dccg, struct dcn_dccg_reg_state *dccg_reg_state) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + dccg_reg_state->dc_mem_global_pwr_req_cntl = REG_READ(DC_MEM_GLOBAL_PWR_REQ_CNTL); + dccg_reg_state->dccg_audio_dtbclk_dto_modulo = REG_READ(DCCG_AUDIO_DTBCLK_DTO_MODULO); + dccg_reg_state->dccg_audio_dtbclk_dto_phase = REG_READ(DCCG_AUDIO_DTBCLK_DTO_PHASE); + dccg_reg_state->dccg_audio_dto_source = REG_READ(DCCG_AUDIO_DTO_SOURCE); + dccg_reg_state->dccg_audio_dto0_module = REG_READ(DCCG_AUDIO_DTO0_MODULE); + dccg_reg_state->dccg_audio_dto0_phase = REG_READ(DCCG_AUDIO_DTO0_PHASE); + dccg_reg_state->dccg_audio_dto1_module = REG_READ(DCCG_AUDIO_DTO1_MODULE); + dccg_reg_state->dccg_audio_dto1_phase = REG_READ(DCCG_AUDIO_DTO1_PHASE); + dccg_reg_state->dccg_cac_status = REG_READ(DCCG_CAC_STATUS); + dccg_reg_state->dccg_cac_status2 = REG_READ(DCCG_CAC_STATUS2); + dccg_reg_state->dccg_disp_cntl_reg = REG_READ(DCCG_DISP_CNTL_REG); + dccg_reg_state->dccg_ds_cntl = REG_READ(DCCG_DS_CNTL); + dccg_reg_state->dccg_ds_dto_incr = REG_READ(DCCG_DS_DTO_INCR); + dccg_reg_state->dccg_ds_dto_modulo = REG_READ(DCCG_DS_DTO_MODULO); + dccg_reg_state->dccg_ds_hw_cal_interval = REG_READ(DCCG_DS_HW_CAL_INTERVAL); + dccg_reg_state->dccg_gate_disable_cntl = REG_READ(DCCG_GATE_DISABLE_CNTL); + dccg_reg_state->dccg_gate_disable_cntl2 = REG_READ(DCCG_GATE_DISABLE_CNTL2); + dccg_reg_state->dccg_gate_disable_cntl3 = REG_READ(DCCG_GATE_DISABLE_CNTL3); + dccg_reg_state->dccg_gate_disable_cntl4 = REG_READ(DCCG_GATE_DISABLE_CNTL4); + dccg_reg_state->dccg_gate_disable_cntl5 = REG_READ(DCCG_GATE_DISABLE_CNTL5); + dccg_reg_state->dccg_gate_disable_cntl6 = REG_READ(DCCG_GATE_DISABLE_CNTL6); + dccg_reg_state->dccg_global_fgcg_rep_cntl = REG_READ(DCCG_GLOBAL_FGCG_REP_CNTL); + dccg_reg_state->dccg_gtc_cntl = REG_READ(DCCG_GTC_CNTL); + dccg_reg_state->dccg_gtc_current = REG_READ(DCCG_GTC_CURRENT); + dccg_reg_state->dccg_gtc_dto_incr = REG_READ(DCCG_GTC_DTO_INCR); + dccg_reg_state->dccg_gtc_dto_modulo = REG_READ(DCCG_GTC_DTO_MODULO); + dccg_reg_state->dccg_perfmon_cntl = REG_READ(DCCG_PERFMON_CNTL); + dccg_reg_state->dccg_perfmon_cntl2 = REG_READ(DCCG_PERFMON_CNTL2); + dccg_reg_state->dccg_soft_reset = REG_READ(DCCG_SOFT_RESET); + dccg_reg_state->dccg_test_clk_sel = REG_READ(DCCG_TEST_CLK_SEL); + dccg_reg_state->dccg_vsync_cnt_ctrl = REG_READ(DCCG_VSYNC_CNT_CTRL); + dccg_reg_state->dccg_vsync_cnt_int_ctrl = REG_READ(DCCG_VSYNC_CNT_INT_CTRL); + dccg_reg_state->dccg_vsync_otg0_latch_value = REG_READ(DCCG_VSYNC_OTG0_LATCH_VALUE); + dccg_reg_state->dccg_vsync_otg1_latch_value = REG_READ(DCCG_VSYNC_OTG1_LATCH_VALUE); + dccg_reg_state->dccg_vsync_otg2_latch_value = REG_READ(DCCG_VSYNC_OTG2_LATCH_VALUE); + dccg_reg_state->dccg_vsync_otg3_latch_value = REG_READ(DCCG_VSYNC_OTG3_LATCH_VALUE); + dccg_reg_state->dccg_vsync_otg4_latch_value = REG_READ(DCCG_VSYNC_OTG4_LATCH_VALUE); + dccg_reg_state->dccg_vsync_otg5_latch_value = REG_READ(DCCG_VSYNC_OTG5_LATCH_VALUE); + dccg_reg_state->dispclk_cgtt_blk_ctrl_reg = REG_READ(DISPCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->dispclk_freq_change_cntl = REG_READ(DISPCLK_FREQ_CHANGE_CNTL); + dccg_reg_state->dp_dto_dbuf_en = REG_READ(DP_DTO_DBUF_EN); + dccg_reg_state->dp_dto0_modulo = REG_READ(DP_DTO_MODULO[0]); + dccg_reg_state->dp_dto0_phase = REG_READ(DP_DTO_PHASE[0]); + dccg_reg_state->dp_dto1_modulo = REG_READ(DP_DTO_MODULO[1]); + dccg_reg_state->dp_dto1_phase = REG_READ(DP_DTO_PHASE[1]); + dccg_reg_state->dp_dto2_modulo = REG_READ(DP_DTO_MODULO[2]); + dccg_reg_state->dp_dto2_phase = REG_READ(DP_DTO_PHASE[2]); + dccg_reg_state->dp_dto3_modulo = REG_READ(DP_DTO_MODULO[3]); + dccg_reg_state->dp_dto3_phase = REG_READ(DP_DTO_PHASE[3]); + dccg_reg_state->dpiaclk_540m_dto_modulo = REG_READ(DPIACLK_540M_DTO_MODULO); + dccg_reg_state->dpiaclk_540m_dto_phase = REG_READ(DPIACLK_540M_DTO_PHASE); + dccg_reg_state->dpiaclk_810m_dto_modulo = REG_READ(DPIACLK_810M_DTO_MODULO); + dccg_reg_state->dpiaclk_810m_dto_phase = REG_READ(DPIACLK_810M_DTO_PHASE); + dccg_reg_state->dpiaclk_dto_cntl = REG_READ(DPIACLK_DTO_CNTL); + dccg_reg_state->dpiasymclk_cntl = REG_READ(DPIASYMCLK_CNTL); + dccg_reg_state->dppclk_cgtt_blk_ctrl_reg = REG_READ(DPPCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->dppclk_ctrl = REG_READ(DPPCLK_CTRL); + dccg_reg_state->dppclk_dto_ctrl = REG_READ(DPPCLK_DTO_CTRL); + dccg_reg_state->dppclk0_dto_param = REG_READ(DPPCLK_DTO_PARAM[0]); + dccg_reg_state->dppclk1_dto_param = REG_READ(DPPCLK_DTO_PARAM[1]); + dccg_reg_state->dppclk2_dto_param = REG_READ(DPPCLK_DTO_PARAM[2]); + dccg_reg_state->dppclk3_dto_param = REG_READ(DPPCLK_DTO_PARAM[3]); + dccg_reg_state->dprefclk_cgtt_blk_ctrl_reg = REG_READ(DPREFCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->dprefclk_cntl = REG_READ(DPREFCLK_CNTL); + dccg_reg_state->dpstreamclk_cntl = REG_READ(DPSTREAMCLK_CNTL); + dccg_reg_state->dscclk_dto_ctrl = REG_READ(DSCCLK_DTO_CTRL); + dccg_reg_state->dscclk0_dto_param = REG_READ(DSCCLK0_DTO_PARAM); + dccg_reg_state->dscclk1_dto_param = REG_READ(DSCCLK1_DTO_PARAM); + dccg_reg_state->dscclk2_dto_param = REG_READ(DSCCLK2_DTO_PARAM); + dccg_reg_state->dscclk3_dto_param = REG_READ(DSCCLK3_DTO_PARAM); + dccg_reg_state->dtbclk_dto_dbuf_en = REG_READ(DTBCLK_DTO_DBUF_EN); + dccg_reg_state->dtbclk_dto0_modulo = REG_READ(DTBCLK_DTO_MODULO[0]); + dccg_reg_state->dtbclk_dto0_phase = REG_READ(DTBCLK_DTO_PHASE[0]); + dccg_reg_state->dtbclk_dto1_modulo = REG_READ(DTBCLK_DTO_MODULO[1]); + dccg_reg_state->dtbclk_dto1_phase = REG_READ(DTBCLK_DTO_PHASE[1]); + dccg_reg_state->dtbclk_dto2_modulo = REG_READ(DTBCLK_DTO_MODULO[2]); + dccg_reg_state->dtbclk_dto2_phase = REG_READ(DTBCLK_DTO_PHASE[2]); + dccg_reg_state->dtbclk_dto3_modulo = REG_READ(DTBCLK_DTO_MODULO[3]); + dccg_reg_state->dtbclk_dto3_phase = REG_READ(DTBCLK_DTO_PHASE[3]); + dccg_reg_state->dtbclk_p_cntl = REG_READ(DTBCLK_P_CNTL); + dccg_reg_state->force_symclk_disable = REG_READ(FORCE_SYMCLK_DISABLE); + dccg_reg_state->hdmicharclk0_clock_cntl = REG_READ(HDMICHARCLK0_CLOCK_CNTL); + dccg_reg_state->hdmistreamclk_cntl = REG_READ(HDMISTREAMCLK_CNTL); + dccg_reg_state->hdmistreamclk0_dto_param = REG_READ(HDMISTREAMCLK0_DTO_PARAM); + dccg_reg_state->microsecond_time_base_div = REG_READ(MICROSECOND_TIME_BASE_DIV); + dccg_reg_state->millisecond_time_base_div = REG_READ(MILLISECOND_TIME_BASE_DIV); + dccg_reg_state->otg_pixel_rate_div = REG_READ(OTG_PIXEL_RATE_DIV); + dccg_reg_state->otg0_phypll_pixel_rate_cntl = REG_READ(OTG0_PHYPLL_PIXEL_RATE_CNTL); + dccg_reg_state->otg0_pixel_rate_cntl = REG_READ(OTG0_PIXEL_RATE_CNTL); + dccg_reg_state->otg1_phypll_pixel_rate_cntl = REG_READ(OTG1_PHYPLL_PIXEL_RATE_CNTL); + dccg_reg_state->otg1_pixel_rate_cntl = REG_READ(OTG1_PIXEL_RATE_CNTL); + dccg_reg_state->otg2_phypll_pixel_rate_cntl = REG_READ(OTG2_PHYPLL_PIXEL_RATE_CNTL); + dccg_reg_state->otg2_pixel_rate_cntl = REG_READ(OTG2_PIXEL_RATE_CNTL); + dccg_reg_state->otg3_phypll_pixel_rate_cntl = REG_READ(OTG3_PHYPLL_PIXEL_RATE_CNTL); + dccg_reg_state->otg3_pixel_rate_cntl = REG_READ(OTG3_PIXEL_RATE_CNTL); + dccg_reg_state->phyasymclk_clock_cntl = REG_READ(PHYASYMCLK_CLOCK_CNTL); + dccg_reg_state->phybsymclk_clock_cntl = REG_READ(PHYBSYMCLK_CLOCK_CNTL); + dccg_reg_state->phycsymclk_clock_cntl = REG_READ(PHYCSYMCLK_CLOCK_CNTL); + dccg_reg_state->phydsymclk_clock_cntl = REG_READ(PHYDSYMCLK_CLOCK_CNTL); + dccg_reg_state->phyesymclk_clock_cntl = REG_READ(PHYESYMCLK_CLOCK_CNTL); + dccg_reg_state->phyplla_pixclk_resync_cntl = REG_READ(PHYPLLA_PIXCLK_RESYNC_CNTL); + dccg_reg_state->phypllb_pixclk_resync_cntl = REG_READ(PHYPLLB_PIXCLK_RESYNC_CNTL); + dccg_reg_state->phypllc_pixclk_resync_cntl = REG_READ(PHYPLLC_PIXCLK_RESYNC_CNTL); + dccg_reg_state->phyplld_pixclk_resync_cntl = REG_READ(PHYPLLD_PIXCLK_RESYNC_CNTL); + dccg_reg_state->phyplle_pixclk_resync_cntl = REG_READ(PHYPLLE_PIXCLK_RESYNC_CNTL); + dccg_reg_state->refclk_cgtt_blk_ctrl_reg = REG_READ(REFCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->socclk_cgtt_blk_ctrl_reg = REG_READ(SOCCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->symclk_cgtt_blk_ctrl_reg = REG_READ(SYMCLK_CGTT_BLK_CTRL_REG); + dccg_reg_state->symclk_psp_cntl = REG_READ(SYMCLK_PSP_CNTL); + dccg_reg_state->symclk32_le_cntl = REG_READ(SYMCLK32_LE_CNTL); + dccg_reg_state->symclk32_se_cntl = REG_READ(SYMCLK32_SE_CNTL); + dccg_reg_state->symclka_clock_enable = REG_READ(SYMCLKA_CLOCK_ENABLE); + dccg_reg_state->symclkb_clock_enable = REG_READ(SYMCLKB_CLOCK_ENABLE); + dccg_reg_state->symclkc_clock_enable = REG_READ(SYMCLKC_CLOCK_ENABLE); + dccg_reg_state->symclkd_clock_enable = REG_READ(SYMCLKD_CLOCK_ENABLE); + dccg_reg_state->symclke_clock_enable = REG_READ(SYMCLKE_CLOCK_ENABLE); +} + static const struct dccg_funcs dccg31_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, @@ -727,6 +849,7 @@ static const struct dccg_funcs dccg31_funcs = { .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, .disable_dsc = dccg31_disable_dscclk, .enable_dsc = dccg31_enable_dscclk, + .dccg_read_reg_state = dccg31_read_reg_state, }; struct dccg *dccg31_create( diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h index cd261051dc2c..bf659920d4cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h @@ -236,4 +236,6 @@ void dccg31_disable_dscclk(struct dccg *dccg, int inst); void dccg31_enable_dscclk(struct dccg *dccg, int inst); +void dccg31_read_reg_state(struct dccg *dccg, struct dcn_dccg_reg_state *dccg_reg_state); + #endif //__DCN31_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c index 8f6edd8e9beb..ef3db6beba25 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c @@ -377,7 +377,8 @@ static const struct dccg_funcs dccg314_funcs = { .get_pixel_rate_div = dccg314_get_pixel_rate_div, .trigger_dio_fifo_resync = dccg314_trigger_dio_fifo_resync, .set_valid_pixel_rate = dccg314_set_valid_pixel_rate, - .set_dtbclk_p_src = dccg314_set_dtbclk_p_src + .set_dtbclk_p_src = dccg314_set_dtbclk_p_src, + .dccg_read_reg_state = dccg31_read_reg_state }; struct dccg *dccg314_create( diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h index 60ea1d248deb..a609635f35db 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h @@ -74,8 +74,7 @@ SR(DCCG_GATE_DISABLE_CNTL3),\ SR(HDMISTREAMCLK0_DTO_PARAM),\ SR(OTG_PIXEL_RATE_DIV),\ - SR(DTBCLK_P_CNTL),\ - SR(DCCG_AUDIO_DTO_SOURCE) + SR(DTBCLK_P_CNTL) #define DCCG_MASK_SH_LIST_DCN314_COMMON(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 0aa41759f79d..bd2f528137b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -2453,6 +2453,7 @@ static const struct dccg_funcs dccg35_funcs = { .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, .dccg_root_gate_disable_control = dccg35_root_gate_disable_control, + .dccg_read_reg_state = dccg31_read_reg_state, }; struct dccg *dccg35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h index ad7a5850d31b..7b9c36456cd9 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h @@ -41,8 +41,9 @@ SR(SYMCLKA_CLOCK_ENABLE),\ SR(SYMCLKB_CLOCK_ENABLE),\ SR(SYMCLKC_CLOCK_ENABLE),\ - SR(SYMCLKD_CLOCK_ENABLE),\ - SR(SYMCLKE_CLOCK_ENABLE) + SR(SYMCLKD_CLOCK_ENABLE), \ + SR(SYMCLKE_CLOCK_ENABLE),\ + SR(SYMCLK_PSP_CNTL) #define DCCG_MASK_SH_LIST_DCN35(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 0b8ed9b94d3c..663a18ee5162 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -886,6 +886,7 @@ static const struct dccg_funcs dccg401_funcs = { .enable_symclk_se = dccg401_enable_symclk_se, .disable_symclk_se = dccg401_disable_symclk_se, .set_dtbclk_p_src = dccg401_set_dtbclk_p_src, + .dccg_read_reg_state = dccg31_read_reg_state }; struct dccg *dccg401_create( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h index f466182963f7..b12f34345a58 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h @@ -1348,7 +1348,8 @@ struct dcn_dpp_mask { uint32_t CURSOR0_COLOR1; \ uint32_t DPP_CONTROL; \ uint32_t CM_HDR_MULT_COEF; \ - uint32_t CURSOR0_FP_SCALE_BIAS; + uint32_t CURSOR0_FP_SCALE_BIAS; \ + uint32_t OBUF_CONTROL; struct dcn_dpp_registers { DPP_COMMON_REG_VARIABLE_LIST @@ -1450,7 +1451,6 @@ void dpp1_set_degamma( void dpp1_set_degamma_pwl(struct dpp *dpp_base, const struct pwl_params *params); - void dpp_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index fe52df01caf7..ef4a16117181 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -84,6 +84,22 @@ void dpp30_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s) } } +void dpp30_read_reg_state(struct dpp *dpp_base, struct dcn_dpp_reg_state *dpp_reg_state) +{ + struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); + + dpp_reg_state->recout_start = REG_READ(RECOUT_START); + dpp_reg_state->recout_size = REG_READ(RECOUT_SIZE); + dpp_reg_state->scl_horz_filter_scale_ratio = REG_READ(SCL_HORZ_FILTER_SCALE_RATIO); + dpp_reg_state->scl_vert_filter_scale_ratio = REG_READ(SCL_VERT_FILTER_SCALE_RATIO); + dpp_reg_state->scl_mode = REG_READ(SCL_MODE); + dpp_reg_state->cm_control = REG_READ(CM_CONTROL); + dpp_reg_state->dpp_control = REG_READ(DPP_CONTROL); + dpp_reg_state->dscl_control = REG_READ(DSCL_CONTROL); + dpp_reg_state->obuf_control = REG_READ(OBUF_CONTROL); + dpp_reg_state->mpc_size = REG_READ(MPC_SIZE); +} + /*program post scaler scs block in dpp CM*/ void dpp3_program_post_csc( struct dpp *dpp_base, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h index f236824126e9..d4a70b4379ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h @@ -594,6 +594,8 @@ void dpp3_program_CM_dealpha( void dpp30_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); +void dpp30_read_reg_state(struct dpp *dpp_base, struct dcn_dpp_reg_state *dpp_reg_state); + bool dpp3_get_optimal_number_of_taps( struct dpp *dpp, struct scaler_data *scl_data, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c index fa67e54bf94e..8a5aa5e86850 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c @@ -134,6 +134,7 @@ static struct dpp_funcs dcn32_dpp_funcs = { .dpp_dppclk_control = dpp1_dppclk_control, .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap, + .dpp_read_reg_state = dpp30_read_reg_state, }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index f7a373a3d70a..977d83bf7741 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -95,6 +95,7 @@ void dpp35_program_bias_and_scale_fcnv( static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, .dpp_read_state = dpp30_read_state, + .dpp_read_reg_state = dpp30_read_reg_state, .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp3_get_optimal_number_of_taps, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 36187f890d5d..96c2c853de42 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -248,6 +248,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .set_optional_cursor_attributes = dpp401_set_optional_cursor_attributes, .dpp_dppclk_control = dpp1_dppclk_control, .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, + .dpp_read_reg_state = dpp30_read_reg_state, .set_cursor_matrix = dpp401_set_cursor_matrix, }; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index ee8c11e085cd..242f1e6f0d8f 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -35,6 +35,7 @@ static void dsc_write_to_registers(struct display_stream_compressor *dsc, const static const struct dsc_funcs dcn20_dsc_funcs = { .dsc_get_enc_caps = dsc2_get_enc_caps, .dsc_read_state = dsc2_read_state, + .dsc_read_reg_state = dsc2_read_reg_state, .dsc_validate_stream = dsc2_validate_stream, .dsc_set_config = dsc2_set_config, .dsc_get_packed_pps = dsc2_get_packed_pps, @@ -155,6 +156,13 @@ void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); } +void dsc2_read_reg_state(struct display_stream_compressor *dsc, struct dcn_dsc_reg_state *dccg_reg_state) +{ + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + dccg_reg_state->dsc_top_control = REG_READ(DSC_TOP_CONTROL); + dccg_reg_state->dscc_interrupt_control_status = REG_READ(DSCC_INTERRUPT_CONTROL_STATUS); +} bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg) { diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h index a9c04fc95bd1..2337c3a97235 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h @@ -606,6 +606,7 @@ bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, uint8_t *dsc_packed_pps); void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); +void dsc2_read_reg_state(struct display_stream_compressor *dsc, struct dcn_dsc_reg_state *dccg_reg_state); bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, struct dsc_optc_config *dsc_optc_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c index 6f4f5a3c4861..f9c6377ac66c 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c @@ -32,6 +32,7 @@ static void dsc35_enable(struct display_stream_compressor *dsc, int opp_pipe); static const struct dsc_funcs dcn35_dsc_funcs = { .dsc_get_enc_caps = dsc2_get_enc_caps, .dsc_read_state = dsc2_read_state, + .dsc_read_reg_state = dsc2_read_reg_state, .dsc_validate_stream = dsc2_validate_stream, .dsc_set_config = dsc2_set_config, .dsc_get_packed_pps = dsc2_get_packed_pps, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 7bd92ae8b13e..c1bdbb38c690 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -26,6 +26,7 @@ static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_disconnect = dsc401_disconnect, .dsc_wait_disconnect_pending_clear = dsc401_wait_disconnect_pending_clear, .dsc_get_single_enc_caps = dsc401_get_single_enc_caps, + .dsc_read_reg_state = dsc2_read_reg_state }; /* Macro definitios for REG_SET macros*/ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h index b433e16842bf..81c83d5fe042 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h @@ -66,6 +66,10 @@ struct dcn_dsc_state { uint32_t dsc_opp_source; }; +struct dcn_dsc_reg_state { + uint32_t dsc_top_control; + uint32_t dscc_interrupt_control_status; +}; /* DSC encoder capabilities * They differ from the DPCD DSC caps because they are based on AMD DSC encoder caps. @@ -100,6 +104,7 @@ struct dsc_enc_caps { struct dsc_funcs { void (*dsc_get_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); void (*dsc_read_state)(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); + void (*dsc_read_reg_state)(struct display_stream_compressor *dsc, struct dcn_dsc_reg_state *dccg_reg_state); bool (*dsc_validate_stream)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); void (*dsc_set_config)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, struct dsc_optc_config *dsc_optc_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index e7e5f6d4778e..181a93dc46e6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -440,33 +440,15 @@ void hubbub3_init_watermarks(struct hubbub *hubbub) REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg); } -void hubbub3_get_det_sizes(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes) +void hubbub3_read_reg_state(struct hubbub *hubbub, struct dcn_hubbub_reg_state *hubbub_reg_state) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - REG_GET_2(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, &curr_det_sizes[0], - DET0_SIZE, &target_det_sizes[0]); - - REG_GET_2(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, &curr_det_sizes[1], - DET1_SIZE, &target_det_sizes[1]); - - REG_GET_2(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, &curr_det_sizes[2], - DET2_SIZE, &target_det_sizes[2]); - - REG_GET_2(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, &curr_det_sizes[3], - DET3_SIZE, &target_det_sizes[3]); - -} - -uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub) -{ - struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t compbuf_config_error = 0; - - REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, - &compbuf_config_error); - - return compbuf_config_error; + hubbub_reg_state->det0_ctrl = REG_READ(DCHUBBUB_DET0_CTRL); + hubbub_reg_state->det1_ctrl = REG_READ(DCHUBBUB_DET1_CTRL); + hubbub_reg_state->det2_ctrl = REG_READ(DCHUBBUB_DET2_CTRL); + hubbub_reg_state->det3_ctrl = REG_READ(DCHUBBUB_DET3_CTRL); + hubbub_reg_state->compbuf_ctrl = REG_READ(DCHUBBUB_COMPBUF_CTRL); } static const struct hubbub_funcs hubbub30_funcs = { @@ -486,8 +468,7 @@ static const struct hubbub_funcs hubbub30_funcs = { .force_pstate_change_control = hubbub3_force_pstate_change_control, .init_watermarks = hubbub3_init_watermarks, .hubbub_read_state = hubbub2_read_state, - .get_det_sizes = hubbub3_get_det_sizes, - .compbuf_config_error = hubbub3_compbuf_config_error, + .hubbub_read_reg_state = hubbub3_read_reg_state }; void hubbub3_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h index 49a469969d36..9e14de3ccaee 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h @@ -133,10 +133,6 @@ void hubbub3_force_pstate_change_control(struct hubbub *hubbub, void hubbub3_init_watermarks(struct hubbub *hubbub); -void hubbub3_get_det_sizes(struct hubbub *hubbub, - uint32_t *curr_det_sizes, - uint32_t *target_det_sizes); - -uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub); +void hubbub3_read_reg_state(struct hubbub *hubbub, struct dcn_hubbub_reg_state *hubbub_reg_state); #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index cdb20251a154..d1aaa58b7db3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -1071,8 +1071,7 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, .hubbub_read_state = hubbub2_read_state, - .get_det_sizes = hubbub3_get_det_sizes, - .compbuf_config_error = hubbub3_compbuf_config_error, + .hubbub_read_reg_state = hubbub3_read_reg_state }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 4d4ca6d77bbd..237331b35378 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -1037,8 +1037,7 @@ static const struct hubbub_funcs hubbub32_funcs = { .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .set_request_limit = hubbub32_set_request_limit, .get_mall_en = hubbub32_get_mall_en, - .get_det_sizes = hubbub3_get_det_sizes, - .compbuf_config_error = hubbub3_compbuf_config_error, + .hubbub_read_reg_state = hubbub3_read_reg_state }; void hubbub32_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index a443722a8632..1b7746a6549a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -589,8 +589,7 @@ static const struct hubbub_funcs hubbub35_funcs = { .hubbub_read_state = hubbub2_read_state, .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .dchubbub_init = hubbub35_init, - .get_det_sizes = hubbub3_get_det_sizes, - .compbuf_config_error = hubbub3_compbuf_config_error, + .hubbub_read_reg_state = hubbub3_read_reg_state }; void hubbub35_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index a36273a52880..d11afd1ce72a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1247,8 +1247,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, .program_arbiter = dcn401_program_arbiter, - .get_det_sizes = hubbub3_get_det_sizes, - .compbuf_config_error = hubbub3_compbuf_config_error, + .hubbub_read_reg_state = hubbub3_read_reg_state }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index cf2eb9793008..f2571076fc50 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -105,7 +105,9 @@ SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\ SRI(HUBP_CLK_CNTL, HUBP, id),\ - SRI(HUBPRET_READ_LINE_VALUE, HUBPRET, id) + SRI(HUBPRET_READ_LINE_VALUE, HUBPRET, id),\ + SRI(HUBP_MEASURE_WIN_CTRL_DCFCLK, HUBP, id),\ + SRI(HUBP_MEASURE_WIN_CTRL_DPPCLK, HUBP, id) /* Register address initialization macro for ASICs with VM */ #define HUBP_REG_LIST_DCN_VM(id)\ @@ -251,7 +253,19 @@ uint32_t CURSOR_HOT_SPOT; \ uint32_t CURSOR_DST_OFFSET; \ uint32_t HUBP_CLK_CNTL; \ - uint32_t HUBPRET_READ_LINE_VALUE + uint32_t HUBPRET_READ_LINE_VALUE; \ + uint32_t HUBP_MEASURE_WIN_CTRL_DCFCLK; \ + uint32_t HUBP_MEASURE_WIN_CTRL_DPPCLK; \ + uint32_t HUBPRET_INTERRUPT; \ + uint32_t HUBPRET_MEM_PWR_CTRL; \ + uint32_t HUBPRET_MEM_PWR_STATUS; \ + uint32_t HUBPRET_READ_LINE_CTRL0; \ + uint32_t HUBPRET_READ_LINE_CTRL1; \ + uint32_t HUBPRET_READ_LINE0; \ + uint32_t HUBPRET_READ_LINE1; \ + uint32_t HUBPREQ_MEM_PWR_CTRL; \ + uint32_t HUBPREQ_MEM_PWR_STATUS + #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -688,6 +702,123 @@ struct dcn_fl_regs_st { uint32_t lut_fl_mode; uint32_t lut_fl_format; }; +struct dcn_hubp_reg_state { + uint32_t hubp_cntl; + uint32_t mall_config; + uint32_t mall_sub_vp; + uint32_t hubp_req_size_config; + uint32_t hubp_req_size_config_c; + uint32_t vmpg_config; + uint32_t addr_config; + uint32_t pri_viewport_dimension; + uint32_t pri_viewport_dimension_c; + uint32_t pri_viewport_start; + uint32_t pri_viewport_start_c; + uint32_t sec_viewport_dimension; + uint32_t sec_viewport_dimension_c; + uint32_t sec_viewport_start; + uint32_t sec_viewport_start_c; + uint32_t surface_config; + uint32_t tiling_config; + uint32_t clk_cntl; + uint32_t mall_status; + uint32_t measure_win_ctrl_dcfclk; + uint32_t measure_win_ctrl_dppclk; + + uint32_t blank_offset_0; + uint32_t blank_offset_1; + uint32_t cursor_settings; + uint32_t dcn_cur0_ttu_cntl0; + uint32_t dcn_cur0_ttu_cntl1; + uint32_t dcn_cur1_ttu_cntl0; + uint32_t dcn_cur1_ttu_cntl1; + uint32_t dcn_dmdat_vm_cntl; + uint32_t dcn_expansion_mode; + uint32_t dcn_global_ttu_cntl; + uint32_t dcn_surf0_ttu_cntl0; + uint32_t dcn_surf0_ttu_cntl1; + uint32_t dcn_surf1_ttu_cntl0; + uint32_t dcn_surf1_ttu_cntl1; + uint32_t dcn_ttu_qos_wm; + uint32_t dcn_vm_mx_l1_tlb_cntl; + uint32_t dcn_vm_system_aperture_high_addr; + uint32_t dcn_vm_system_aperture_low_addr; + uint32_t dcsurf_flip_control; + uint32_t dcsurf_flip_control2; + uint32_t dcsurf_primary_meta_surface_address; + uint32_t dcsurf_primary_meta_surface_address_c; + uint32_t dcsurf_primary_meta_surface_address_high; + uint32_t dcsurf_primary_meta_surface_address_high_c; + uint32_t dcsurf_primary_surface_address; + uint32_t dcsurf_primary_surface_address_c; + uint32_t dcsurf_primary_surface_address_high; + uint32_t dcsurf_primary_surface_address_high_c; + uint32_t dcsurf_secondary_meta_surface_address; + uint32_t dcsurf_secondary_meta_surface_address_c; + uint32_t dcsurf_secondary_meta_surface_address_high; + uint32_t dcsurf_secondary_meta_surface_address_high_c; + uint32_t dcsurf_secondary_surface_address; + uint32_t dcsurf_secondary_surface_address_c; + uint32_t dcsurf_secondary_surface_address_high; + uint32_t dcsurf_secondary_surface_address_high_c; + uint32_t dcsurf_surface_control; + uint32_t dcsurf_surface_earliest_inuse; + uint32_t dcsurf_surface_earliest_inuse_c; + uint32_t dcsurf_surface_earliest_inuse_high; + uint32_t dcsurf_surface_earliest_inuse_high_c; + uint32_t dcsurf_surface_flip_interrupt; + uint32_t dcsurf_surface_inuse; + uint32_t dcsurf_surface_inuse_c; + uint32_t dcsurf_surface_inuse_high; + uint32_t dcsurf_surface_inuse_high_c; + uint32_t dcsurf_surface_pitch; + uint32_t dcsurf_surface_pitch_c; + uint32_t dst_after_scaler; + uint32_t dst_dimensions; + uint32_t dst_y_delta_drq_limit; + uint32_t flip_parameters_0; + uint32_t flip_parameters_1; + uint32_t flip_parameters_2; + uint32_t flip_parameters_3; + uint32_t flip_parameters_4; + uint32_t flip_parameters_5; + uint32_t flip_parameters_6; + uint32_t hubpreq_mem_pwr_ctrl; + uint32_t hubpreq_mem_pwr_status; + uint32_t nom_parameters_0; + uint32_t nom_parameters_1; + uint32_t nom_parameters_2; + uint32_t nom_parameters_3; + uint32_t nom_parameters_4; + uint32_t nom_parameters_5; + uint32_t nom_parameters_6; + uint32_t nom_parameters_7; + uint32_t per_line_delivery; + uint32_t per_line_delivery_pre; + uint32_t prefetch_settings; + uint32_t prefetch_settings_c; + uint32_t ref_freq_to_pix_freq; + uint32_t uclk_pstate_force; + uint32_t vblank_parameters_0; + uint32_t vblank_parameters_1; + uint32_t vblank_parameters_2; + uint32_t vblank_parameters_3; + uint32_t vblank_parameters_4; + uint32_t vblank_parameters_5; + uint32_t vblank_parameters_6; + uint32_t vmid_settings_0; + + uint32_t hubpret_control; + uint32_t hubpret_interrupt; + uint32_t hubpret_mem_pwr_ctrl; + uint32_t hubpret_mem_pwr_status; + uint32_t hubpret_read_line_ctrl0; + uint32_t hubpret_read_line_ctrl1; + uint32_t hubpret_read_line_status; + uint32_t hubpret_read_line_value; + uint32_t hubpret_read_line0; + uint32_t hubpret_read_line1; +}; struct dcn_hubp_state { struct _vcs_dpi_display_dlg_regs_st dlg_attr; @@ -718,7 +849,6 @@ struct dcn_hubp_state { uint32_t hubp_cntl; uint32_t flip_control; }; - struct dcn10_hubp { struct hubp base; struct dcn_hubp_state state; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index f325db555102..7062e6653062 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -145,7 +145,8 @@ uint32_t FLIP_PARAMETERS_2;\ uint32_t DCN_CUR1_TTU_CNTL0;\ uint32_t DCN_CUR1_TTU_CNTL1;\ - uint32_t VMID_SETTINGS_0 + uint32_t VMID_SETTINGS_0;\ + uint32_t DST_Y_DELTA_DRQ_LIMIT /*shared with dcn3.x*/ #define DCN21_HUBP_REG_COMMON_VARIABLE_LIST \ @@ -176,7 +177,10 @@ uint32_t HUBP_3DLUT_CONTROL;\ uint32_t HUBP_3DLUT_DLG_PARAM;\ uint32_t DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE;\ - uint32_t DCHUBP_MCACHEID_CONFIG + uint32_t DCHUBP_MCACHEID_CONFIG;\ + uint32_t DCHUBP_MALL_SUB_VP;\ + uint32_t DCHUBP_ADDR_CONFIG;\ + uint32_t HUBP_MALL_STATUS #define DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type) \ DCN_HUBP_REG_FIELD_BASE_LIST(type); \ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 556214b2227d..0cc6f4558989 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -476,6 +476,126 @@ void hubp3_read_state(struct hubp *hubp) } +void hubp3_read_reg_state(struct hubp *hubp, struct dcn_hubp_reg_state *reg_state) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + reg_state->hubp_cntl = REG_READ(DCHUBP_CNTL); + reg_state->mall_config = REG_READ(DCHUBP_MALL_CONFIG); + reg_state->mall_sub_vp = REG_READ(DCHUBP_MALL_SUB_VP); + reg_state->hubp_req_size_config = REG_READ(DCHUBP_REQ_SIZE_CONFIG); + reg_state->hubp_req_size_config_c = REG_READ(DCHUBP_REQ_SIZE_CONFIG_C); + reg_state->vmpg_config = REG_READ(DCHUBP_VMPG_CONFIG); + reg_state->addr_config = REG_READ(DCSURF_ADDR_CONFIG); + reg_state->pri_viewport_dimension = REG_READ(DCSURF_PRI_VIEWPORT_DIMENSION); + reg_state->pri_viewport_dimension_c = REG_READ(DCSURF_PRI_VIEWPORT_DIMENSION_C); + reg_state->pri_viewport_start = REG_READ(DCSURF_PRI_VIEWPORT_START); + reg_state->pri_viewport_start_c = REG_READ(DCSURF_PRI_VIEWPORT_START_C); + reg_state->sec_viewport_dimension = REG_READ(DCSURF_SEC_VIEWPORT_DIMENSION); + reg_state->sec_viewport_dimension_c = REG_READ(DCSURF_SEC_VIEWPORT_DIMENSION_C); + reg_state->sec_viewport_start = REG_READ(DCSURF_SEC_VIEWPORT_START); + reg_state->sec_viewport_start_c = REG_READ(DCSURF_SEC_VIEWPORT_START_C); + reg_state->surface_config = REG_READ(DCSURF_SURFACE_CONFIG); + reg_state->tiling_config = REG_READ(DCSURF_TILING_CONFIG); + reg_state->clk_cntl = REG_READ(HUBP_CLK_CNTL); + reg_state->mall_status = REG_READ(HUBP_MALL_STATUS); + reg_state->measure_win_ctrl_dcfclk = REG_READ(HUBP_MEASURE_WIN_CTRL_DCFCLK); + reg_state->measure_win_ctrl_dppclk = REG_READ(HUBP_MEASURE_WIN_CTRL_DPPCLK); + + reg_state->blank_offset_0 = REG_READ(BLANK_OFFSET_0); + reg_state->blank_offset_1 = REG_READ(BLANK_OFFSET_1); + reg_state->cursor_settings = REG_READ(CURSOR_SETTINGS); + reg_state->dcn_cur0_ttu_cntl0 = REG_READ(DCN_CUR0_TTU_CNTL0); + reg_state->dcn_cur0_ttu_cntl1 = REG_READ(DCN_CUR0_TTU_CNTL1); + reg_state->dcn_cur1_ttu_cntl0 = REG_READ(DCN_CUR1_TTU_CNTL0); + reg_state->dcn_cur1_ttu_cntl1 = REG_READ(DCN_CUR1_TTU_CNTL1); + reg_state->dcn_dmdat_vm_cntl = REG_READ(DCN_DMDATA_VM_CNTL); + reg_state->dcn_expansion_mode = REG_READ(DCN_EXPANSION_MODE); + reg_state->dcn_global_ttu_cntl = REG_READ(DCN_GLOBAL_TTU_CNTL); + reg_state->dcn_surf0_ttu_cntl0 = REG_READ(DCN_SURF0_TTU_CNTL0); + reg_state->dcn_surf0_ttu_cntl1 = REG_READ(DCN_SURF0_TTU_CNTL1); + reg_state->dcn_surf1_ttu_cntl0 = REG_READ(DCN_SURF1_TTU_CNTL0); + reg_state->dcn_surf1_ttu_cntl1 = REG_READ(DCN_SURF1_TTU_CNTL1); + reg_state->dcn_ttu_qos_wm = REG_READ(DCN_TTU_QOS_WM); + reg_state->dcn_vm_mx_l1_tlb_cntl = REG_READ(DCN_VM_MX_L1_TLB_CNTL); + reg_state->dcn_vm_system_aperture_high_addr = REG_READ(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR); + reg_state->dcn_vm_system_aperture_low_addr = REG_READ(DCN_VM_SYSTEM_APERTURE_LOW_ADDR); + reg_state->dcsurf_flip_control = REG_READ(DCSURF_FLIP_CONTROL); + reg_state->dcsurf_flip_control2 = REG_READ(DCSURF_FLIP_CONTROL2); + reg_state->dcsurf_primary_meta_surface_address = REG_READ(DCSURF_PRIMARY_META_SURFACE_ADDRESS); + reg_state->dcsurf_primary_meta_surface_address_c = REG_READ(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C); + reg_state->dcsurf_primary_meta_surface_address_high = REG_READ(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH); + reg_state->dcsurf_primary_meta_surface_address_high_c = REG_READ(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C); + reg_state->dcsurf_primary_surface_address = REG_READ(DCSURF_PRIMARY_SURFACE_ADDRESS); + reg_state->dcsurf_primary_surface_address_c = REG_READ(DCSURF_PRIMARY_SURFACE_ADDRESS_C); + reg_state->dcsurf_primary_surface_address_high = REG_READ(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH); + reg_state->dcsurf_primary_surface_address_high_c = REG_READ(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C); + reg_state->dcsurf_secondary_meta_surface_address = REG_READ(DCSURF_SECONDARY_META_SURFACE_ADDRESS); + reg_state->dcsurf_secondary_meta_surface_address_c = REG_READ(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C); + reg_state->dcsurf_secondary_meta_surface_address_high = REG_READ(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH); + reg_state->dcsurf_secondary_meta_surface_address_high_c = REG_READ(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C); + reg_state->dcsurf_secondary_surface_address = REG_READ(DCSURF_SECONDARY_SURFACE_ADDRESS); + reg_state->dcsurf_secondary_surface_address_c = REG_READ(DCSURF_SECONDARY_SURFACE_ADDRESS_C); + reg_state->dcsurf_secondary_surface_address_high = REG_READ(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH); + reg_state->dcsurf_secondary_surface_address_high_c = REG_READ(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C); + reg_state->dcsurf_surface_control = REG_READ(DCSURF_SURFACE_CONTROL); + reg_state->dcsurf_surface_earliest_inuse = REG_READ(DCSURF_SURFACE_EARLIEST_INUSE); + reg_state->dcsurf_surface_earliest_inuse_c = REG_READ(DCSURF_SURFACE_EARLIEST_INUSE_C); + reg_state->dcsurf_surface_earliest_inuse_high = REG_READ(DCSURF_SURFACE_EARLIEST_INUSE_HIGH); + reg_state->dcsurf_surface_earliest_inuse_high_c = REG_READ(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C); + reg_state->dcsurf_surface_flip_interrupt = REG_READ(DCSURF_SURFACE_FLIP_INTERRUPT); + reg_state->dcsurf_surface_inuse = REG_READ(DCSURF_SURFACE_INUSE); + reg_state->dcsurf_surface_inuse_c = REG_READ(DCSURF_SURFACE_INUSE_C); + reg_state->dcsurf_surface_inuse_high = REG_READ(DCSURF_SURFACE_INUSE_HIGH); + reg_state->dcsurf_surface_inuse_high_c = REG_READ(DCSURF_SURFACE_INUSE_HIGH_C); + reg_state->dcsurf_surface_pitch = REG_READ(DCSURF_SURFACE_PITCH); + reg_state->dcsurf_surface_pitch_c = REG_READ(DCSURF_SURFACE_PITCH_C); + reg_state->dst_after_scaler = REG_READ(DST_AFTER_SCALER); + reg_state->dst_dimensions = REG_READ(DST_DIMENSIONS); + reg_state->dst_y_delta_drq_limit = REG_READ(DST_Y_DELTA_DRQ_LIMIT); + reg_state->flip_parameters_0 = REG_READ(FLIP_PARAMETERS_0); + reg_state->flip_parameters_1 = REG_READ(FLIP_PARAMETERS_1); + reg_state->flip_parameters_2 = REG_READ(FLIP_PARAMETERS_2); + reg_state->flip_parameters_3 = REG_READ(FLIP_PARAMETERS_3); + reg_state->flip_parameters_4 = REG_READ(FLIP_PARAMETERS_4); + reg_state->flip_parameters_5 = REG_READ(FLIP_PARAMETERS_5); + reg_state->flip_parameters_6 = REG_READ(FLIP_PARAMETERS_6); + reg_state->hubpreq_mem_pwr_ctrl = REG_READ(HUBPREQ_MEM_PWR_CTRL); + reg_state->hubpreq_mem_pwr_status = REG_READ(HUBPREQ_MEM_PWR_STATUS); + reg_state->nom_parameters_0 = REG_READ(NOM_PARAMETERS_0); + reg_state->nom_parameters_1 = REG_READ(NOM_PARAMETERS_1); + reg_state->nom_parameters_2 = REG_READ(NOM_PARAMETERS_2); + reg_state->nom_parameters_3 = REG_READ(NOM_PARAMETERS_3); + reg_state->nom_parameters_4 = REG_READ(NOM_PARAMETERS_4); + reg_state->nom_parameters_5 = REG_READ(NOM_PARAMETERS_5); + reg_state->nom_parameters_6 = REG_READ(NOM_PARAMETERS_6); + reg_state->nom_parameters_7 = REG_READ(NOM_PARAMETERS_7); + reg_state->per_line_delivery = REG_READ(PER_LINE_DELIVERY); + reg_state->per_line_delivery_pre = REG_READ(PER_LINE_DELIVERY_PRE); + reg_state->prefetch_settings = REG_READ(PREFETCH_SETTINGS); + reg_state->prefetch_settings_c = REG_READ(PREFETCH_SETTINGS_C); + reg_state->ref_freq_to_pix_freq = REG_READ(REF_FREQ_TO_PIX_FREQ); + reg_state->uclk_pstate_force = REG_READ(UCLK_PSTATE_FORCE); + reg_state->vblank_parameters_0 = REG_READ(VBLANK_PARAMETERS_0); + reg_state->vblank_parameters_1 = REG_READ(VBLANK_PARAMETERS_1); + reg_state->vblank_parameters_2 = REG_READ(VBLANK_PARAMETERS_2); + reg_state->vblank_parameters_3 = REG_READ(VBLANK_PARAMETERS_3); + reg_state->vblank_parameters_4 = REG_READ(VBLANK_PARAMETERS_4); + reg_state->vblank_parameters_5 = REG_READ(VBLANK_PARAMETERS_5); + reg_state->vblank_parameters_6 = REG_READ(VBLANK_PARAMETERS_6); + reg_state->vmid_settings_0 = REG_READ(VMID_SETTINGS_0); + reg_state->hubpret_control = REG_READ(HUBPRET_CONTROL); + reg_state->hubpret_interrupt = REG_READ(HUBPRET_INTERRUPT); + reg_state->hubpret_mem_pwr_ctrl = REG_READ(HUBPRET_MEM_PWR_CTRL); + reg_state->hubpret_mem_pwr_status = REG_READ(HUBPRET_MEM_PWR_STATUS); + reg_state->hubpret_read_line_ctrl0 = REG_READ(HUBPRET_READ_LINE_CTRL0); + reg_state->hubpret_read_line_ctrl1 = REG_READ(HUBPRET_READ_LINE_CTRL1); + reg_state->hubpret_read_line_status = REG_READ(HUBPRET_READ_LINE_STATUS); + reg_state->hubpret_read_line_value = REG_READ(HUBPRET_READ_LINE_VALUE); + reg_state->hubpret_read_line0 = REG_READ(HUBPRET_READ_LINE0); + reg_state->hubpret_read_line1 = REG_READ(HUBPRET_READ_LINE1); +} + void hubp3_setup( struct hubp *hubp, struct _vcs_dpi_display_dlg_regs_st *dlg_attr, @@ -505,30 +625,6 @@ void hubp3_init(struct hubp *hubp) hubp_reset(hubp); } -uint32_t hubp3_get_current_read_line(struct hubp *hubp) -{ - uint32_t read_line = 0; - struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - - REG_GET(HUBPRET_READ_LINE_VALUE, - PIPE_READ_LINE, - &read_line); - - return read_line; -} - -unsigned int hubp3_get_underflow_status(struct hubp *hubp) -{ - uint32_t hubp_underflow = 0; - struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - - REG_GET(DCHUBP_CNTL, - HUBP_UNDERFLOW_STATUS, - &hubp_underflow); - - return hubp_underflow; -} - static struct hubp_funcs dcn30_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -558,8 +654,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, .hubp_clear_tiling = hubp3_clear_tiling, - .hubp_get_underflow_status = hubp3_get_underflow_status, - .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_read_reg_state = hubp3_read_reg_state }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index 842f4eb72cc8..c767e9f4f9b3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -296,6 +296,8 @@ void hubp3_dmdata_set_attributes( void hubp3_read_state(struct hubp *hubp); +void hubp3_read_reg_state(struct hubp *hubp, struct dcn_hubp_reg_state *reg_state); + void hubp3_init(struct hubp *hubp); void hubp3_clear_tiling(struct hubp *hubp); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 47101847c2b7..189045f85039 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -110,9 +110,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, .hubp_clear_tiling = hubp3_clear_tiling, - .hubp_get_underflow_status = hubp3_get_underflow_status, - .hubp_get_current_read_line = hubp3_get_current_read_line, - .hubp_get_det_config_error = hubp31_get_det_config_error, + .hubp_read_reg_state = hubp3_read_reg_state, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index 41c8f78efdc3..a781085b046b 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -222,9 +222,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_update_mall_sel = hubp32_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, .hubp_clear_tiling = hubp3_clear_tiling, - .hubp_get_underflow_status = hubp3_get_underflow_status, - .hubp_get_current_read_line = hubp3_get_current_read_line, - .hubp_get_det_config_error = hubp31_get_det_config_error, + .hubp_read_reg_state = hubp3_read_reg_state }; bool hubp32_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index b140808f21af..79c583e258c7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -209,6 +209,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .dmdata_load = hubp2_dmdata_load, .dmdata_status_done = hubp2_dmdata_status_done, .hubp_read_state = hubp3_read_state, + .hubp_read_reg_state = hubp3_read_reg_state, .hubp_clear_underflow = hubp2_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp35_init, @@ -218,9 +219,6 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank_value, .hubp_clear_tiling = hubp3_clear_tiling, - .hubp_get_underflow_status = hubp3_get_underflow_status, - .hubp_get_current_read_line = hubp3_get_current_read_line, - .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp35_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 0fcbc6a35be6..3a2e0848173e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -1071,9 +1071,7 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, .hubp_clear_tiling = hubp401_clear_tiling, .hubp_program_3dlut_fl_config = hubp401_program_3dlut_fl_config, - .hubp_get_underflow_status = hubp3_get_underflow_status, - .hubp_get_current_read_line = hubp3_get_current_read_line, - .hubp_get_det_config_error = hubp31_get_det_config_error, + .hubp_read_reg_state = hubp3_read_reg_state }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index e47ed5571dfd..81bcadf5e57e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -53,7 +53,8 @@ #include "link_service.h" #include "dc_state_priv.h" - +#define TO_DCN_DCCG(dccg)\ + container_of(dccg, struct dcn_dccg, base) #define DC_LOGGER_INIT(logger) @@ -1235,44 +1236,47 @@ void dcn30_get_underflow_debug_data(const struct dc *dc, { struct hubbub *hubbub = dc->res_pool->hubbub; - if (tg) { - uint32_t v_blank_start = 0, v_blank_end = 0; - - out_data->otg_inst = tg->inst; - - tg->funcs->get_scanoutpos(tg, - &v_blank_start, - &v_blank_end, - &out_data->h_position, - &out_data->v_position); - - out_data->otg_frame_count = tg->funcs->get_frame_count(tg); - - out_data->otg_underflow = tg->funcs->is_optc_underflow_occurred(tg); + if (hubbub) { + if (hubbub->funcs->hubbub_read_reg_state) { + hubbub->funcs->hubbub_read_reg_state(hubbub, out_data->hubbub_reg_state); + } } for (int i = 0; i < MAX_PIPES; i++) { struct hubp *hubp = dc->res_pool->hubps[i]; - - if (hubp) { - if (hubp->funcs->hubp_get_underflow_status) - out_data->hubps[i].hubp_underflow = hubp->funcs->hubp_get_underflow_status(hubp); - - if (hubp->funcs->hubp_in_blank) - out_data->hubps[i].hubp_in_blank = hubp->funcs->hubp_in_blank(hubp); - - if (hubp->funcs->hubp_get_current_read_line) - out_data->hubps[i].hubp_readline = hubp->funcs->hubp_get_current_read_line(hubp); - - if (hubp->funcs->hubp_get_det_config_error) - out_data->hubps[i].det_config_error = hubp->funcs->hubp_get_det_config_error(hubp); - } + struct dpp *dpp = dc->res_pool->dpps[i]; + struct output_pixel_processor *opp = dc->res_pool->opps[i]; + struct display_stream_compressor *dsc = dc->res_pool->dscs[i]; + struct mpc *mpc = dc->res_pool->mpc; + struct timing_generator *optc = dc->res_pool->timing_generators[i]; + struct dccg *dccg = dc->res_pool->dccg; + + if (hubp) + if (hubp->funcs->hubp_read_reg_state) + hubp->funcs->hubp_read_reg_state(hubp, out_data->hubp_reg_state[i]); + + if (dpp) + if (dpp->funcs->dpp_read_reg_state) + dpp->funcs->dpp_read_reg_state(dpp, out_data->dpp_reg_state[i]); + + if (opp) + if (opp->funcs->opp_read_reg_state) + opp->funcs->opp_read_reg_state(opp, out_data->opp_reg_state[i]); + + if (dsc) + if (dsc->funcs->dsc_read_reg_state) + dsc->funcs->dsc_read_reg_state(dsc, out_data->dsc_reg_state[i]); + + if (mpc) + if (mpc->funcs->mpc_read_reg_state) + mpc->funcs->mpc_read_reg_state(mpc, i, out_data->mpc_reg_state[i]); + + if (optc) + if (optc->funcs->optc_read_reg_state) + optc->funcs->optc_read_reg_state(optc, out_data->optc_reg_state[i]); + + if (dccg) + if (dccg->funcs->dccg_read_reg_state) + dccg->funcs->dccg_read_reg_state(dccg, out_data->dccg_reg_state[i]); } - - if (hubbub->funcs->get_det_sizes) - hubbub->funcs->get_det_sizes(hubbub, out_data->curr_det_sizes, out_data->target_det_sizes); - - if (hubbub->funcs->compbuf_config_error) - out_data->compbuf_config_error = hubbub->funcs->compbuf_config_error(hubbub); - } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 61c4d2a7db1c..500a601e99b5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -71,6 +71,125 @@ enum pixel_rate_div { PIXEL_RATE_DIV_NA = 0xF }; +struct dcn_dccg_reg_state { + uint32_t dc_mem_global_pwr_req_cntl; + uint32_t dccg_audio_dtbclk_dto_modulo; + uint32_t dccg_audio_dtbclk_dto_phase; + uint32_t dccg_audio_dto_source; + uint32_t dccg_audio_dto0_module; + uint32_t dccg_audio_dto0_phase; + uint32_t dccg_audio_dto1_module; + uint32_t dccg_audio_dto1_phase; + uint32_t dccg_cac_status; + uint32_t dccg_cac_status2; + uint32_t dccg_disp_cntl_reg; + uint32_t dccg_ds_cntl; + uint32_t dccg_ds_dto_incr; + uint32_t dccg_ds_dto_modulo; + uint32_t dccg_ds_hw_cal_interval; + uint32_t dccg_gate_disable_cntl; + uint32_t dccg_gate_disable_cntl2; + uint32_t dccg_gate_disable_cntl3; + uint32_t dccg_gate_disable_cntl4; + uint32_t dccg_gate_disable_cntl5; + uint32_t dccg_gate_disable_cntl6; + uint32_t dccg_global_fgcg_rep_cntl; + uint32_t dccg_gtc_cntl; + uint32_t dccg_gtc_current; + uint32_t dccg_gtc_dto_incr; + uint32_t dccg_gtc_dto_modulo; + uint32_t dccg_perfmon_cntl; + uint32_t dccg_perfmon_cntl2; + uint32_t dccg_soft_reset; + uint32_t dccg_test_clk_sel; + uint32_t dccg_vsync_cnt_ctrl; + uint32_t dccg_vsync_cnt_int_ctrl; + uint32_t dccg_vsync_otg0_latch_value; + uint32_t dccg_vsync_otg1_latch_value; + uint32_t dccg_vsync_otg2_latch_value; + uint32_t dccg_vsync_otg3_latch_value; + uint32_t dccg_vsync_otg4_latch_value; + uint32_t dccg_vsync_otg5_latch_value; + uint32_t dispclk_cgtt_blk_ctrl_reg; + uint32_t dispclk_freq_change_cntl; + uint32_t dp_dto_dbuf_en; + uint32_t dp_dto0_modulo; + uint32_t dp_dto0_phase; + uint32_t dp_dto1_modulo; + uint32_t dp_dto1_phase; + uint32_t dp_dto2_modulo; + uint32_t dp_dto2_phase; + uint32_t dp_dto3_modulo; + uint32_t dp_dto3_phase; + uint32_t dpiaclk_540m_dto_modulo; + uint32_t dpiaclk_540m_dto_phase; + uint32_t dpiaclk_810m_dto_modulo; + uint32_t dpiaclk_810m_dto_phase; + uint32_t dpiaclk_dto_cntl; + uint32_t dpiasymclk_cntl; + uint32_t dppclk_cgtt_blk_ctrl_reg; + uint32_t dppclk_ctrl; + uint32_t dppclk_dto_ctrl; + uint32_t dppclk0_dto_param; + uint32_t dppclk1_dto_param; + uint32_t dppclk2_dto_param; + uint32_t dppclk3_dto_param; + uint32_t dprefclk_cgtt_blk_ctrl_reg; + uint32_t dprefclk_cntl; + uint32_t dpstreamclk_cntl; + uint32_t dscclk_dto_ctrl; + uint32_t dscclk0_dto_param; + uint32_t dscclk1_dto_param; + uint32_t dscclk2_dto_param; + uint32_t dscclk3_dto_param; + uint32_t dtbclk_dto_dbuf_en; + uint32_t dtbclk_dto0_modulo; + uint32_t dtbclk_dto0_phase; + uint32_t dtbclk_dto1_modulo; + uint32_t dtbclk_dto1_phase; + uint32_t dtbclk_dto2_modulo; + uint32_t dtbclk_dto2_phase; + uint32_t dtbclk_dto3_modulo; + uint32_t dtbclk_dto3_phase; + uint32_t dtbclk_p_cntl; + uint32_t force_symclk_disable; + uint32_t hdmicharclk0_clock_cntl; + uint32_t hdmistreamclk_cntl; + uint32_t hdmistreamclk0_dto_param; + uint32_t microsecond_time_base_div; + uint32_t millisecond_time_base_div; + uint32_t otg_pixel_rate_div; + uint32_t otg0_phypll_pixel_rate_cntl; + uint32_t otg0_pixel_rate_cntl; + uint32_t otg1_phypll_pixel_rate_cntl; + uint32_t otg1_pixel_rate_cntl; + uint32_t otg2_phypll_pixel_rate_cntl; + uint32_t otg2_pixel_rate_cntl; + uint32_t otg3_phypll_pixel_rate_cntl; + uint32_t otg3_pixel_rate_cntl; + uint32_t phyasymclk_clock_cntl; + uint32_t phybsymclk_clock_cntl; + uint32_t phycsymclk_clock_cntl; + uint32_t phydsymclk_clock_cntl; + uint32_t phyesymclk_clock_cntl; + uint32_t phyplla_pixclk_resync_cntl; + uint32_t phypllb_pixclk_resync_cntl; + uint32_t phypllc_pixclk_resync_cntl; + uint32_t phyplld_pixclk_resync_cntl; + uint32_t phyplle_pixclk_resync_cntl; + uint32_t refclk_cgtt_blk_ctrl_reg; + uint32_t socclk_cgtt_blk_ctrl_reg; + uint32_t symclk_cgtt_blk_ctrl_reg; + uint32_t symclk_psp_cntl; + uint32_t symclk32_le_cntl; + uint32_t symclk32_se_cntl; + uint32_t symclka_clock_enable; + uint32_t symclkb_clock_enable; + uint32_t symclkc_clock_enable; + uint32_t symclkd_clock_enable; + uint32_t symclke_clock_enable; +}; + struct dccg { struct dc_context *ctx; const struct dccg_funcs *funcs; @@ -81,7 +200,6 @@ struct dccg { //int audio_dtbclk_khz;/* TODO needs to be removed */ //int ref_dtbclk_khz;/* TODO needs to be removed */ }; - struct dtbclk_dto_params { const struct dc_crtc_timing *timing; int otg_inst; @@ -214,6 +332,7 @@ struct dccg_funcs { void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst, uint32_t num_slices_h); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); + void (*dccg_read_reg_state)(struct dccg *dccg, struct dcn_dccg_reg_state *dccg_reg_state); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 2ce47c403840..dafc8490efb5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -137,6 +137,14 @@ struct dcn_hubbub_state { uint32_t dram_state_cntl; }; +struct dcn_hubbub_reg_state { + uint32_t det0_ctrl; + uint32_t det1_ctrl; + uint32_t det2_ctrl; + uint32_t det3_ctrl; + uint32_t compbuf_ctrl; +}; + struct hubbub_system_latencies { uint32_t max_latency_ns; uint32_t avg_latency_ns; @@ -216,6 +224,8 @@ struct hubbub_funcs { void (*init_watermarks)(struct hubbub *hubbub); + void (*hubbub_read_reg_state)(struct hubbub *hubbub, struct dcn_hubbub_reg_state *hubbub_reg_state); + /** * @program_det_size: * @@ -242,8 +252,6 @@ struct hubbub_funcs { void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); - void (*get_det_sizes)(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes); - uint32_t (*compbuf_config_error)(struct hubbub *hubbub); struct hubbub_perfmon_funcs { void (*reset)(struct hubbub *hubbub); void (*start_measuring_max_memory_latency_ns)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 09c224691618..d88b57d4f512 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -65,7 +65,6 @@ union defer_reg_writes { } bits; uint32_t raw; }; - struct dpp { const struct dpp_funcs *funcs; struct dc_context *ctx; @@ -203,6 +202,19 @@ struct dcn_dpp_state { uint32_t gamcor_mode; }; +struct dcn_dpp_reg_state { + uint32_t recout_start; + uint32_t recout_size; + uint32_t scl_horz_filter_scale_ratio; + uint32_t scl_vert_filter_scale_ratio; + uint32_t scl_mode; + uint32_t cm_control; + uint32_t dpp_control; + uint32_t dscl_control; + uint32_t obuf_control; + uint32_t mpc_size; +}; + struct CM_bias_params { uint32_t cm_bias_cr_r; uint32_t cm_bias_y_g; @@ -226,6 +238,8 @@ struct dpp_funcs { void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s); + void (*dpp_read_reg_state)(struct dpp *dpp, struct dcn_dpp_reg_state *dpp_reg_state); + void (*dpp_reset)(struct dpp *dpp); void (*dpp_set_scaler)(struct dpp *dpp, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index c62c4c1ed7c3..a79019365af8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -238,6 +238,7 @@ struct hubp_funcs { void (*hubp_clk_cntl)(struct hubp *hubp, bool enable); void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst); void (*hubp_read_state)(struct hubp *hubp); + void (*hubp_read_reg_state)(struct hubp *hubp, struct dcn_hubp_reg_state *reg_state); void (*hubp_clear_underflow)(struct hubp *hubp); void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp); unsigned int (*hubp_get_underflow_status)(struct hubp *hubp); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 22960ee03dee..a8d1abe20f62 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -350,6 +350,15 @@ struct mpcc_state { struct mpc_rmcm_regs rmcm_regs; }; +struct dcn_mpc_reg_state { + uint32_t mpcc_bot_sel; + uint32_t mpcc_control; + uint32_t mpcc_status; + uint32_t mpcc_top_sel; + uint32_t mpcc_opp_id; + uint32_t mpcc_ogam_control; +}; + /** * struct mpc_funcs - funcs */ @@ -373,6 +382,24 @@ struct mpc_funcs { struct mpc *mpc, int mpcc_inst, struct mpcc_state *s); + /** + * @mpc_read_reg_state: + * + * Read MPC register state for debugging underflow purposes. + * + * Parameters: + * + * - [in] mpc - MPC context + * - [out] reg_state - MPC register state structure + * + * Return: + * + * void + */ + void (*mpc_read_reg_state)( + struct mpc *mpc, + int mpcc_inst, + struct dcn_mpc_reg_state *mpc_reg_state); /** * @insert_plane: diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index 747679cb4944..e1428a83ecbc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -297,6 +297,16 @@ struct oppbuf_params { uint32_t num_segment_padded_pixels; }; +struct dcn_opp_reg_state { + uint32_t dpg_control; + uint32_t fmt_control; + uint32_t oppbuf_control; + uint32_t opp_pipe_control; + uint32_t opp_pipe_crc_control; + uint32_t opp_abm_control; + uint32_t dscrm_dsc_forward_config; +}; + struct opp_funcs { @@ -368,6 +378,9 @@ struct opp_funcs { struct output_pixel_processor *opp, enum dc_pixel_encoding pixel_encoding, bool is_primary); + + void (*opp_read_reg_state)( + struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index f2de2cf23859..da7bf59c4b9d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -175,6 +175,135 @@ struct dcn_otg_state { uint32_t otg_double_buffer_control; }; +struct dcn_optc_reg_state { + uint32_t optc_bytes_per_pixel; + uint32_t optc_data_format_control; + uint32_t optc_data_source_select; + uint32_t optc_input_clock_control; + uint32_t optc_input_global_control; + uint32_t optc_input_spare_register; + uint32_t optc_memory_config; + uint32_t optc_rsmu_underflow; + uint32_t optc_underflow_threshold; + uint32_t optc_width_control; + + uint32_t otg_3d_structure_control; + uint32_t otg_clock_control; + uint32_t otg_control; + uint32_t otg_count_control; + uint32_t otg_count_reset; + uint32_t otg_crc_cntl; + uint32_t otg_crc_sig_blue_control_mask; + uint32_t otg_crc_sig_red_green_mask; + uint32_t otg_crc0_data_b; + uint32_t otg_crc0_data_rg; + uint32_t otg_crc0_windowa_x_control; + uint32_t otg_crc0_windowa_x_control_readback; + uint32_t otg_crc0_windowa_y_control; + uint32_t otg_crc0_windowa_y_control_readback; + uint32_t otg_crc0_windowb_x_control; + uint32_t otg_crc0_windowb_x_control_readback; + uint32_t otg_crc0_windowb_y_control; + uint32_t otg_crc0_windowb_y_control_readback; + uint32_t otg_crc1_data_b; + uint32_t otg_crc1_data_rg; + uint32_t otg_crc1_windowa_x_control; + uint32_t otg_crc1_windowa_x_control_readback; + uint32_t otg_crc1_windowa_y_control; + uint32_t otg_crc1_windowa_y_control_readback; + uint32_t otg_crc1_windowb_x_control; + uint32_t otg_crc1_windowb_x_control_readback; + uint32_t otg_crc1_windowb_y_control; + uint32_t otg_crc1_windowb_y_control_readback; + uint32_t otg_crc2_data_b; + uint32_t otg_crc2_data_rg; + uint32_t otg_crc3_data_b; + uint32_t otg_crc3_data_rg; + uint32_t otg_dlpc_control; + uint32_t otg_double_buffer_control; + uint32_t otg_drr_control2; + uint32_t otg_drr_control; + uint32_t otg_drr_timing_int_status; + uint32_t otg_drr_trigger_window; + uint32_t otg_drr_v_total_change; + uint32_t otg_drr_v_total_reach_range; + uint32_t otg_dsc_start_position; + uint32_t otg_force_count_now_cntl; + uint32_t otg_global_control0; + uint32_t otg_global_control1; + uint32_t otg_global_control2; + uint32_t otg_global_control3; + uint32_t otg_global_control4; + uint32_t otg_global_sync_status; + uint32_t otg_gsl_control; + uint32_t otg_gsl_vsync_gap; + uint32_t otg_gsl_window_x; + uint32_t otg_gsl_window_y; + uint32_t otg_h_blank_start_end; + uint32_t otg_h_sync_a; + uint32_t otg_h_sync_a_cntl; + uint32_t otg_h_timing_cntl; + uint32_t otg_h_total; + uint32_t otg_interlace_control; + uint32_t otg_interlace_status; + uint32_t otg_interrupt_control; + uint32_t otg_long_vblank_status; + uint32_t otg_m_const_dto0; + uint32_t otg_m_const_dto1; + uint32_t otg_manual_force_vsync_next_line; + uint32_t otg_master_en; + uint32_t otg_master_update_lock; + uint32_t otg_master_update_mode; + uint32_t otg_nom_vert_position; + uint32_t otg_pipe_update_status; + uint32_t otg_pixel_data_readback0; + uint32_t otg_pixel_data_readback1; + uint32_t otg_request_control; + uint32_t otg_snapshot_control; + uint32_t otg_snapshot_frame; + uint32_t otg_snapshot_position; + uint32_t otg_snapshot_status; + uint32_t otg_spare_register; + uint32_t otg_static_screen_control; + uint32_t otg_status; + uint32_t otg_status_frame_count; + uint32_t otg_status_hv_count; + uint32_t otg_status_position; + uint32_t otg_status_vf_count; + uint32_t otg_stereo_control; + uint32_t otg_stereo_force_next_eye; + uint32_t otg_stereo_status; + uint32_t otg_trig_manual_control; + uint32_t otg_triga_cntl; + uint32_t otg_triga_manual_trig; + uint32_t otg_trigb_cntl; + uint32_t otg_trigb_manual_trig; + uint32_t otg_update_lock; + uint32_t otg_v_blank_start_end; + uint32_t otg_v_count_stop_control; + uint32_t otg_v_count_stop_control2; + uint32_t otg_v_sync_a; + uint32_t otg_v_sync_a_cntl; + uint32_t otg_v_total; + uint32_t otg_v_total_control; + uint32_t otg_v_total_int_status; + uint32_t otg_v_total_max; + uint32_t otg_v_total_mid; + uint32_t otg_v_total_min; + uint32_t otg_vert_sync_control; + uint32_t otg_vertical_interrupt0_control; + uint32_t otg_vertical_interrupt0_position; + uint32_t otg_vertical_interrupt1_control; + uint32_t otg_vertical_interrupt1_position; + uint32_t otg_vertical_interrupt2_control; + uint32_t otg_vertical_interrupt2_position; + uint32_t otg_vready_param; + uint32_t otg_vstartup_param; + uint32_t otg_vsync_nom_int_status; + uint32_t otg_vupdate_keepout; + uint32_t otg_vupdate_param; +}; + /** * struct timing_generator - Entry point to Output Timing Generator feature. */ @@ -381,6 +510,7 @@ struct timing_generator_funcs { void (*set_vupdate_keepout)(struct timing_generator *tg, bool enable); bool (*wait_update_lock_status)(struct timing_generator *tg, bool locked); void (*read_otg_state)(struct timing_generator *tg, struct dcn_otg_state *s); + void (*optc_read_reg_state)(struct timing_generator *tg, struct dcn_optc_reg_state *optc_reg_state); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 85298b8a1b5e..6bfd2c1294e5 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -1514,6 +1514,21 @@ static void mpc3_read_mpcc_state( MPCC_OGAM_SELECT_CURRENT, &s->rgam_lut); } +void mpc3_read_reg_state( + struct mpc *mpc, + int mpcc_inst, struct dcn_mpc_reg_state *mpc_reg_state) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + mpc_reg_state->mpcc_bot_sel = REG_READ(MPCC_BOT_SEL[mpcc_inst]); + mpc_reg_state->mpcc_control = REG_READ(MPCC_CONTROL[mpcc_inst]); + mpc_reg_state->mpcc_ogam_control = REG_READ(MPCC_OGAM_CONTROL[mpcc_inst]); + mpc_reg_state->mpcc_opp_id = REG_READ(MPCC_OPP_ID[mpcc_inst]); + mpc_reg_state->mpcc_status = REG_READ(MPCC_STATUS[mpcc_inst]); + mpc_reg_state->mpcc_top_sel = REG_READ(MPCC_TOP_SEL[mpcc_inst]); + +} + static const struct mpc_funcs dcn30_mpc_funcs = { .read_mpcc_state = mpc3_read_mpcc_state, .insert_plane = mpc1_insert_plane, @@ -1544,6 +1559,7 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .release_rmu = mpcc3_release_rmu, .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut, .get_mpc_out_mux = mpc1_get_mpc_out_mux, + .mpc_read_reg_state = mpc3_read_reg_state, .set_bg_color = mpc1_set_bg_color, .set_mpc_mem_lp_mode = mpc3_set_mpc_mem_lp_mode, }; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h index 103f29900a2c..e2f147d17178 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h @@ -1096,6 +1096,11 @@ void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); +void mpc3_read_reg_state( + struct mpc *mpc, + int mpcc_inst, + struct dcn_mpc_reg_state *mpc_reg_state); + void mpc3_init_mpcc(struct mpcc *mpcc, int mpcc_inst); enum dc_lut_mode mpc3_get_ogam_current( diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c index 6f0e017a8ae2..83bbbf34bcac 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c @@ -1020,6 +1020,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .release_rmu = NULL, .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut, .get_mpc_out_mux = mpc1_get_mpc_out_mux, + .mpc_read_reg_state = mpc3_read_reg_state, .set_bg_color = mpc1_set_bg_color, }; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index e1a0308dee57..eeac13fdd6f5 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -598,6 +598,7 @@ static const struct mpc_funcs dcn401_mpc_funcs = { .release_rmu = NULL, .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut, .get_mpc_out_mux = mpc1_get_mpc_out_mux, + .mpc_read_reg_state = mpc3_read_reg_state, .set_bg_color = mpc1_set_bg_color, .set_movable_cm_location = mpc401_set_movable_cm_location, .update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select, diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c index 71e9288d60ed..45d418636d0c 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c @@ -372,6 +372,17 @@ void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable) REG_UPDATE(OPP_PIPE_CONTROL, OPP_PIPE_CLOCK_EN, regval); } + +void opp1_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state) +{ + struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); + + opp_reg_state->fmt_control = REG_READ(FMT_CONTROL); + opp_reg_state->opp_pipe_control = REG_READ(OPP_PIPE_CONTROL); + opp_reg_state->opp_pipe_crc_control = REG_READ(OPP_PIPE_CRC_CONTROL); + opp_reg_state->oppbuf_control = REG_READ(OPPBUF_CONTROL); +} + /*****************************************/ /* Constructor, Destructor */ /*****************************************/ @@ -392,7 +403,8 @@ static const struct opp_funcs dcn10_opp_funcs = { .opp_program_dpg_dimensions = NULL, .dpg_is_blanked = NULL, .dpg_is_pending = NULL, - .opp_destroy = opp1_destroy + .opp_destroy = opp1_destroy, + .opp_read_reg_state = opp1_read_reg_state }; void dcn10_opp_construct(struct dcn10_opp *oppn10, diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h index c87de68a509e..38d0d530a9b7 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h @@ -63,7 +63,8 @@ uint32_t OPPBUF_CONTROL1; \ uint32_t OPPBUF_3D_PARAMETERS_0; \ uint32_t OPPBUF_3D_PARAMETERS_1; \ - uint32_t OPP_PIPE_CONTROL + uint32_t OPP_PIPE_CONTROL; \ + uint32_t OPP_PIPE_CRC_CONTROL #define OPP_MASK_SH_LIST_DCN(mask_sh) \ OPP_SF(FMT0_FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, mask_sh), \ @@ -153,7 +154,6 @@ struct dcn10_opp { const struct dcn10_opp_registers *regs; const struct dcn10_opp_shift *opp_shift; const struct dcn10_opp_mask *opp_mask; - bool is_write_to_ram_a_safe; }; @@ -188,4 +188,6 @@ void opp1_pipe_clock_control(struct output_pixel_processor *opp, bool enable); void opp1_destroy(struct output_pixel_processor **opp); +void opp1_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c index f5fe0cac7cb0..ce826a5be4c7 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c @@ -377,6 +377,18 @@ uint32_t opp2_get_left_edge_extra_pixel_count(struct output_pixel_processor *opp return 0; } +void opp2_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state) +{ + struct dcn20_opp *oppn20 = TO_DCN20_OPP(opp); + + opp_reg_state->dpg_control = REG_READ(DPG_CONTROL); + opp_reg_state->fmt_control = REG_READ(FMT_CONTROL); + opp_reg_state->opp_pipe_control = REG_READ(OPP_PIPE_CONTROL); + opp_reg_state->opp_pipe_crc_control = REG_READ(OPP_PIPE_CRC_CONTROL); + opp_reg_state->oppbuf_control = REG_READ(OPPBUF_CONTROL); + opp_reg_state->dscrm_dsc_forward_config = REG_READ(DSCRM_DSC_FORWARD_CONFIG); +} + /*****************************************/ /* Constructor, Destructor */ /*****************************************/ @@ -395,6 +407,7 @@ static struct opp_funcs dcn20_opp_funcs = { .opp_destroy = opp1_destroy, .opp_program_left_edge_extra_pixel = opp2_program_left_edge_extra_pixel, .opp_get_left_edge_extra_pixel_count = opp2_get_left_edge_extra_pixel_count, + .opp_read_reg_state = opp2_read_reg_state }; void dcn20_opp_construct(struct dcn20_opp *oppn20, diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h index 34936e6c49f3..fb0c047c1788 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h @@ -59,7 +59,8 @@ uint32_t DPG_COLOUR_G_Y; \ uint32_t DPG_COLOUR_R_CR; \ uint32_t DPG_RAMP_CONTROL; \ - uint32_t DPG_STATUS + uint32_t DPG_STATUS; \ + uint32_t DSCRM_DSC_FORWARD_CONFIG #define OPP_DPG_MASK_SH_LIST(mask_sh) \ OPP_SF(DPG0_DPG_CONTROL, DPG_EN, mask_sh), \ @@ -171,4 +172,7 @@ void opp2_program_left_edge_extra_pixel ( uint32_t opp2_get_left_edge_extra_pixel_count(struct output_pixel_processor *opp, enum dc_pixel_encoding pixel_encoding, bool is_primary); + +void opp2_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c index 3542b51c9aac..e11c4e16402f 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c @@ -51,3 +51,16 @@ void dcn35_opp_set_fgcg(struct dcn20_opp *oppn20, bool enable) { REG_UPDATE(OPP_TOP_CLK_CONTROL, OPP_FGCG_REP_DIS, !enable); } + +void dcn35_opp_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state) +{ + struct dcn20_opp *oppn20 = TO_DCN20_OPP(opp); + + opp_reg_state->dpg_control = REG_READ(DPG_CONTROL); + opp_reg_state->fmt_control = REG_READ(FMT_CONTROL); + opp_reg_state->opp_abm_control = REG_READ(OPP_ABM_CONTROL); + opp_reg_state->opp_pipe_control = REG_READ(OPP_PIPE_CONTROL); + opp_reg_state->opp_pipe_crc_control = REG_READ(OPP_PIPE_CRC_CONTROL); + opp_reg_state->oppbuf_control = REG_READ(OPPBUF_CONTROL); + opp_reg_state->dscrm_dsc_forward_config = REG_READ(DSCRM_DSC_FORWARD_CONFIG); +} diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h index a9a413527801..c6cace90e8f2 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h @@ -31,7 +31,8 @@ #define OPP_REG_VARIABLE_LIST_DCN3_5 \ OPP_REG_VARIABLE_LIST_DCN2_0; \ - uint32_t OPP_TOP_CLK_CONTROL + uint32_t OPP_TOP_CLK_CONTROL; \ + uint32_t OPP_ABM_CONTROL #define OPP_MASK_SH_LIST_DCN35(mask_sh) \ OPP_MASK_SH_LIST_DCN20(mask_sh), \ @@ -64,4 +65,5 @@ void dcn35_opp_construct(struct dcn20_opp *oppn20, void dcn35_opp_set_fgcg(struct dcn20_opp *oppn20, bool enable); +void dcn35_opp_read_reg_state(struct output_pixel_processor *opp, struct dcn_opp_reg_state *opp_reg_state); #endif diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 8b2a8455eb56..803bcc25601c 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -209,7 +209,43 @@ uint32_t OPTC_WIDTH_CONTROL2; \ uint32_t OTG_PSTATE_REGISTER; \ uint32_t OTG_PIPE_UPDATE_STATUS; \ - uint32_t INTERRUPT_DEST + uint32_t INTERRUPT_DEST; \ + uint32_t OPTC_INPUT_SPARE_REGISTER; \ + uint32_t OPTC_RSMU_UNDERFLOW; \ + uint32_t OPTC_UNDERFLOW_THRESHOLD; \ + uint32_t OTG_COUNT_CONTROL; \ + uint32_t OTG_COUNT_RESET; \ + uint32_t OTG_CRC_SIG_BLUE_CONTROL_MASK; \ + uint32_t OTG_CRC_SIG_RED_GREEN_MASK; \ + uint32_t OTG_DLPC_CONTROL; \ + uint32_t OTG_DRR_CONTROL2; \ + uint32_t OTG_DRR_TIMING_INT_STATUS; \ + uint32_t OTG_GLOBAL_CONTROL3; \ + uint32_t OTG_GLOBAL_SYNC_STATUS; \ + uint32_t OTG_GSL_VSYNC_GAP; \ + uint32_t OTG_INTERLACE_STATUS; \ + uint32_t OTG_INTERRUPT_CONTROL; \ + uint32_t OTG_LONG_VBLANK_STATUS; \ + uint32_t OTG_MANUAL_FORCE_VSYNC_NEXT_LINE; \ + uint32_t OTG_MASTER_EN; \ + uint32_t OTG_PIXEL_DATA_READBACK0; \ + uint32_t OTG_PIXEL_DATA_READBACK1; \ + uint32_t OTG_REQUEST_CONTROL; \ + uint32_t OTG_SNAPSHOT_CONTROL; \ + uint32_t OTG_SNAPSHOT_FRAME; \ + uint32_t OTG_SNAPSHOT_POSITION; \ + uint32_t OTG_SNAPSHOT_STATUS; \ + uint32_t OTG_SPARE_REGISTER; \ + uint32_t OTG_STATUS_HV_COUNT; \ + uint32_t OTG_STATUS_VF_COUNT; \ + uint32_t OTG_STEREO_FORCE_NEXT_EYE; \ + uint32_t OTG_TRIG_MANUAL_CONTROL; \ + uint32_t OTG_TRIGB_CNTL; \ + uint32_t OTG_TRIGB_MANUAL_TRIG; \ + uint32_t OTG_UPDATE_LOCK; \ + uint32_t OTG_V_TOTAL_INT_STATUS; \ + uint32_t OTG_VSYNC_NOM_INT_STATUS + struct dcn_optc_registers { OPTC_REG_VARIABLE_LIST_DCN; diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 4f1830ba619f..c6417538090f 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -315,6 +315,136 @@ void optc31_read_otg_state(struct timing_generator *optc, s->otg_double_buffer_control = REG_READ(OTG_DOUBLE_BUFFER_CONTROL); } +void optc31_read_reg_state(struct timing_generator *optc, struct dcn_optc_reg_state *optc_reg_state) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc_reg_state->optc_bytes_per_pixel = REG_READ(OPTC_BYTES_PER_PIXEL); + optc_reg_state->optc_data_format_control = REG_READ(OPTC_DATA_FORMAT_CONTROL); + optc_reg_state->optc_data_source_select = REG_READ(OPTC_DATA_SOURCE_SELECT); + optc_reg_state->optc_input_clock_control = REG_READ(OPTC_INPUT_CLOCK_CONTROL); + optc_reg_state->optc_input_global_control = REG_READ(OPTC_INPUT_GLOBAL_CONTROL); + optc_reg_state->optc_input_spare_register = REG_READ(OPTC_INPUT_SPARE_REGISTER); + optc_reg_state->optc_memory_config = REG_READ(OPTC_MEMORY_CONFIG); + optc_reg_state->optc_rsmu_underflow = REG_READ(OPTC_RSMU_UNDERFLOW); + optc_reg_state->optc_underflow_threshold = REG_READ(OPTC_UNDERFLOW_THRESHOLD); + optc_reg_state->optc_width_control = REG_READ(OPTC_WIDTH_CONTROL); + optc_reg_state->otg_3d_structure_control = REG_READ(OTG_3D_STRUCTURE_CONTROL); + optc_reg_state->otg_clock_control = REG_READ(OTG_CLOCK_CONTROL); + optc_reg_state->otg_control = REG_READ(OTG_CONTROL); + optc_reg_state->otg_count_control = REG_READ(OTG_COUNT_CONTROL); + optc_reg_state->otg_count_reset = REG_READ(OTG_COUNT_RESET); + optc_reg_state->otg_crc_cntl = REG_READ(OTG_CRC_CNTL); + optc_reg_state->otg_crc_sig_blue_control_mask = REG_READ(OTG_CRC_SIG_BLUE_CONTROL_MASK); + optc_reg_state->otg_crc_sig_red_green_mask = REG_READ(OTG_CRC_SIG_RED_GREEN_MASK); + optc_reg_state->otg_crc0_data_b = REG_READ(OTG_CRC0_DATA_B); + optc_reg_state->otg_crc0_data_rg = REG_READ(OTG_CRC0_DATA_RG); + optc_reg_state->otg_crc0_windowa_x_control = REG_READ(OTG_CRC0_WINDOWA_X_CONTROL); + optc_reg_state->otg_crc0_windowa_x_control_readback = REG_READ(OTG_CRC0_WINDOWA_X_CONTROL_READBACK); + optc_reg_state->otg_crc0_windowa_y_control = REG_READ(OTG_CRC0_WINDOWA_Y_CONTROL); + optc_reg_state->otg_crc0_windowa_y_control_readback = REG_READ(OTG_CRC0_WINDOWA_Y_CONTROL_READBACK); + optc_reg_state->otg_crc0_windowb_x_control = REG_READ(OTG_CRC0_WINDOWB_X_CONTROL); + optc_reg_state->otg_crc0_windowb_x_control_readback = REG_READ(OTG_CRC0_WINDOWB_X_CONTROL_READBACK); + optc_reg_state->otg_crc0_windowb_y_control = REG_READ(OTG_CRC0_WINDOWB_Y_CONTROL); + optc_reg_state->otg_crc0_windowb_y_control_readback = REG_READ(OTG_CRC0_WINDOWB_Y_CONTROL_READBACK); + optc_reg_state->otg_crc1_data_b = REG_READ(OTG_CRC1_DATA_B); + optc_reg_state->otg_crc1_data_rg = REG_READ(OTG_CRC1_DATA_RG); + optc_reg_state->otg_crc1_windowa_x_control = REG_READ(OTG_CRC1_WINDOWA_X_CONTROL); + optc_reg_state->otg_crc1_windowa_x_control_readback = REG_READ(OTG_CRC1_WINDOWA_X_CONTROL_READBACK); + optc_reg_state->otg_crc1_windowa_y_control = REG_READ(OTG_CRC1_WINDOWA_Y_CONTROL); + optc_reg_state->otg_crc1_windowa_y_control_readback = REG_READ(OTG_CRC1_WINDOWA_Y_CONTROL_READBACK); + optc_reg_state->otg_crc1_windowb_x_control = REG_READ(OTG_CRC1_WINDOWB_X_CONTROL); + optc_reg_state->otg_crc1_windowb_x_control_readback = REG_READ(OTG_CRC1_WINDOWB_X_CONTROL_READBACK); + optc_reg_state->otg_crc1_windowb_y_control = REG_READ(OTG_CRC1_WINDOWB_Y_CONTROL); + optc_reg_state->otg_crc1_windowb_y_control_readback = REG_READ(OTG_CRC1_WINDOWB_Y_CONTROL_READBACK); + optc_reg_state->otg_crc2_data_b = REG_READ(OTG_CRC2_DATA_B); + optc_reg_state->otg_crc2_data_rg = REG_READ(OTG_CRC2_DATA_RG); + optc_reg_state->otg_crc3_data_b = REG_READ(OTG_CRC3_DATA_B); + optc_reg_state->otg_crc3_data_rg = REG_READ(OTG_CRC3_DATA_RG); + optc_reg_state->otg_dlpc_control = REG_READ(OTG_DLPC_CONTROL); + optc_reg_state->otg_double_buffer_control = REG_READ(OTG_DOUBLE_BUFFER_CONTROL); + optc_reg_state->otg_drr_control2 = REG_READ(OTG_DRR_CONTROL2); + optc_reg_state->otg_drr_control = REG_READ(OTG_DRR_CONTROL); + optc_reg_state->otg_drr_timing_int_status = REG_READ(OTG_DRR_TIMING_INT_STATUS); + optc_reg_state->otg_drr_trigger_window = REG_READ(OTG_DRR_TRIGGER_WINDOW); + optc_reg_state->otg_drr_v_total_change = REG_READ(OTG_DRR_V_TOTAL_CHANGE); + optc_reg_state->otg_dsc_start_position = REG_READ(OTG_DSC_START_POSITION); + optc_reg_state->otg_force_count_now_cntl = REG_READ(OTG_FORCE_COUNT_NOW_CNTL); + optc_reg_state->otg_global_control0 = REG_READ(OTG_GLOBAL_CONTROL0); + optc_reg_state->otg_global_control1 = REG_READ(OTG_GLOBAL_CONTROL1); + optc_reg_state->otg_global_control2 = REG_READ(OTG_GLOBAL_CONTROL2); + optc_reg_state->otg_global_control3 = REG_READ(OTG_GLOBAL_CONTROL3); + optc_reg_state->otg_global_control4 = REG_READ(OTG_GLOBAL_CONTROL4); + optc_reg_state->otg_global_sync_status = REG_READ(OTG_GLOBAL_SYNC_STATUS); + optc_reg_state->otg_gsl_control = REG_READ(OTG_GSL_CONTROL); + optc_reg_state->otg_gsl_vsync_gap = REG_READ(OTG_GSL_VSYNC_GAP); + optc_reg_state->otg_gsl_window_x = REG_READ(OTG_GSL_WINDOW_X); + optc_reg_state->otg_gsl_window_y = REG_READ(OTG_GSL_WINDOW_Y); + optc_reg_state->otg_h_blank_start_end = REG_READ(OTG_H_BLANK_START_END); + optc_reg_state->otg_h_sync_a = REG_READ(OTG_H_SYNC_A); + optc_reg_state->otg_h_sync_a_cntl = REG_READ(OTG_H_SYNC_A_CNTL); + optc_reg_state->otg_h_timing_cntl = REG_READ(OTG_H_TIMING_CNTL); + optc_reg_state->otg_h_total = REG_READ(OTG_H_TOTAL); + optc_reg_state->otg_interlace_control = REG_READ(OTG_INTERLACE_CONTROL); + optc_reg_state->otg_interlace_status = REG_READ(OTG_INTERLACE_STATUS); + optc_reg_state->otg_interrupt_control = REG_READ(OTG_INTERRUPT_CONTROL); + optc_reg_state->otg_long_vblank_status = REG_READ(OTG_LONG_VBLANK_STATUS); + optc_reg_state->otg_m_const_dto0 = REG_READ(OTG_M_CONST_DTO0); + optc_reg_state->otg_m_const_dto1 = REG_READ(OTG_M_CONST_DTO1); + optc_reg_state->otg_manual_force_vsync_next_line = REG_READ(OTG_MANUAL_FORCE_VSYNC_NEXT_LINE); + optc_reg_state->otg_master_en = REG_READ(OTG_MASTER_EN); + optc_reg_state->otg_master_update_lock = REG_READ(OTG_MASTER_UPDATE_LOCK); + optc_reg_state->otg_master_update_mode = REG_READ(OTG_MASTER_UPDATE_MODE); + optc_reg_state->otg_nom_vert_position = REG_READ(OTG_NOM_VERT_POSITION); + optc_reg_state->otg_pipe_update_status = REG_READ(OTG_PIPE_UPDATE_STATUS); + optc_reg_state->otg_pixel_data_readback0 = REG_READ(OTG_PIXEL_DATA_READBACK0); + optc_reg_state->otg_pixel_data_readback1 = REG_READ(OTG_PIXEL_DATA_READBACK1); + optc_reg_state->otg_request_control = REG_READ(OTG_REQUEST_CONTROL); + optc_reg_state->otg_snapshot_control = REG_READ(OTG_SNAPSHOT_CONTROL); + optc_reg_state->otg_snapshot_frame = REG_READ(OTG_SNAPSHOT_FRAME); + optc_reg_state->otg_snapshot_position = REG_READ(OTG_SNAPSHOT_POSITION); + optc_reg_state->otg_snapshot_status = REG_READ(OTG_SNAPSHOT_STATUS); + optc_reg_state->otg_spare_register = REG_READ(OTG_SPARE_REGISTER); + optc_reg_state->otg_static_screen_control = REG_READ(OTG_STATIC_SCREEN_CONTROL); + optc_reg_state->otg_status = REG_READ(OTG_STATUS); + optc_reg_state->otg_status_frame_count = REG_READ(OTG_STATUS_FRAME_COUNT); + optc_reg_state->otg_status_hv_count = REG_READ(OTG_STATUS_HV_COUNT); + optc_reg_state->otg_status_position = REG_READ(OTG_STATUS_POSITION); + optc_reg_state->otg_status_vf_count = REG_READ(OTG_STATUS_VF_COUNT); + optc_reg_state->otg_stereo_control = REG_READ(OTG_STEREO_CONTROL); + optc_reg_state->otg_stereo_force_next_eye = REG_READ(OTG_STEREO_FORCE_NEXT_EYE); + optc_reg_state->otg_stereo_status = REG_READ(OTG_STEREO_STATUS); + optc_reg_state->otg_trig_manual_control = REG_READ(OTG_TRIG_MANUAL_CONTROL); + optc_reg_state->otg_triga_cntl = REG_READ(OTG_TRIGA_CNTL); + optc_reg_state->otg_triga_manual_trig = REG_READ(OTG_TRIGA_MANUAL_TRIG); + optc_reg_state->otg_trigb_cntl = REG_READ(OTG_TRIGB_CNTL); + optc_reg_state->otg_trigb_manual_trig = REG_READ(OTG_TRIGB_MANUAL_TRIG); + optc_reg_state->otg_update_lock = REG_READ(OTG_UPDATE_LOCK); + optc_reg_state->otg_v_blank_start_end = REG_READ(OTG_V_BLANK_START_END); + optc_reg_state->otg_v_count_stop_control = REG_READ(OTG_V_COUNT_STOP_CONTROL); + optc_reg_state->otg_v_count_stop_control2 = REG_READ(OTG_V_COUNT_STOP_CONTROL2); + optc_reg_state->otg_v_sync_a = REG_READ(OTG_V_SYNC_A); + optc_reg_state->otg_v_sync_a_cntl = REG_READ(OTG_V_SYNC_A_CNTL); + optc_reg_state->otg_v_total = REG_READ(OTG_V_TOTAL); + optc_reg_state->otg_v_total_control = REG_READ(OTG_V_TOTAL_CONTROL); + optc_reg_state->otg_v_total_int_status = REG_READ(OTG_V_TOTAL_INT_STATUS); + optc_reg_state->otg_v_total_max = REG_READ(OTG_V_TOTAL_MAX); + optc_reg_state->otg_v_total_mid = REG_READ(OTG_V_TOTAL_MID); + optc_reg_state->otg_v_total_min = REG_READ(OTG_V_TOTAL_MIN); + optc_reg_state->otg_vert_sync_control = REG_READ(OTG_VERT_SYNC_CONTROL); + optc_reg_state->otg_vertical_interrupt0_control = REG_READ(OTG_VERTICAL_INTERRUPT0_CONTROL); + optc_reg_state->otg_vertical_interrupt0_position = REG_READ(OTG_VERTICAL_INTERRUPT0_POSITION); + optc_reg_state->otg_vertical_interrupt1_control = REG_READ(OTG_VERTICAL_INTERRUPT1_CONTROL); + optc_reg_state->otg_vertical_interrupt1_position = REG_READ(OTG_VERTICAL_INTERRUPT1_POSITION); + optc_reg_state->otg_vertical_interrupt2_control = REG_READ(OTG_VERTICAL_INTERRUPT2_CONTROL); + optc_reg_state->otg_vertical_interrupt2_position = REG_READ(OTG_VERTICAL_INTERRUPT2_POSITION); + optc_reg_state->otg_vready_param = REG_READ(OTG_VREADY_PARAM); + optc_reg_state->otg_vstartup_param = REG_READ(OTG_VSTARTUP_PARAM); + optc_reg_state->otg_vsync_nom_int_status = REG_READ(OTG_VSYNC_NOM_INT_STATUS); + optc_reg_state->otg_vupdate_keepout = REG_READ(OTG_VUPDATE_KEEPOUT); + optc_reg_state->otg_vupdate_param = REG_READ(OTG_VUPDATE_PARAM); +} + static const struct timing_generator_funcs dcn31_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -377,6 +507,7 @@ static const struct timing_generator_funcs dcn31_tg_funcs = { .init_odm = optc3_init_odm, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, .read_otg_state = optc31_read_otg_state, + .optc_read_reg_state = optc31_read_reg_state, }; void dcn31_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h index 0f72c274f40b..98f7d2e299c5 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h @@ -274,4 +274,6 @@ void optc3_init_odm(struct timing_generator *optc); void optc31_read_otg_state(struct timing_generator *optc, struct dcn_otg_state *s); +void optc31_read_reg_state(struct timing_generator *optc, struct dcn_optc_reg_state *optc_reg_state); + #endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c index 4a2caca37255..43ff957288b2 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c @@ -256,6 +256,7 @@ static const struct timing_generator_funcs dcn314_tg_funcs = { .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, .read_otg_state = optc31_read_otg_state, + .optc_read_reg_state = optc31_read_reg_state, }; void dcn314_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index b2b226bcd871..3dcb0d0c931c 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -365,6 +365,7 @@ static const struct timing_generator_funcs dcn32_tg_funcs = { .get_otg_double_buffer_pending = optc3_get_otg_update_pending, .get_pipe_update_pending = optc3_get_pipe_update_pending, .read_otg_state = optc31_read_otg_state, + .optc_read_reg_state = optc31_read_reg_state, }; void dcn32_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 52d5ea98c86b..f699e95059f3 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -511,6 +511,7 @@ static const struct timing_generator_funcs dcn35_tg_funcs = { .set_long_vtotal = optc35_set_long_vtotal, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, .read_otg_state = optc31_read_otg_state, + .optc_read_reg_state = optc31_read_reg_state, }; void dcn35_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 5af13706e601..a8e978d1fae8 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -533,6 +533,7 @@ static const struct timing_generator_funcs dcn401_tg_funcs = { .set_vupdate_keepout = optc401_set_vupdate_keepout, .wait_update_lock_status = optc401_wait_update_lock_status, .read_otg_state = optc31_read_otg_state, + .optc_read_reg_state = optc31_read_reg_state, }; void dcn401_timing_generator_init(struct optc *optc1) -- cgit v1.2.3 From 11cc0bdc5bf7ebd8a14a137ee63e1a4bfd94f5d1 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Thu, 9 Oct 2025 09:13:49 -0400 Subject: drm/amd/display: update link encoder assignment [Why] Map a link encoder instance matching stream encoder instance if possible. [How] Get the stream encoder instance and assign the same link encoder instance if available. Reviewed-by: PeiChen Huang Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 43 +++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 8d0cdbf14e17..89fe72820f6a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2690,17 +2690,40 @@ static inline int find_fixed_dio_link_enc(const struct dc_link *link) } static inline int find_free_dio_link_enc(const struct resource_context *res_ctx, - const struct dc_link *link, const struct resource_pool *pool) + const struct dc_link *link, const struct resource_pool *pool, struct dc_stream_state *stream) { - int i; + int i, j = -1; + int stream_enc_inst = -1; int enc_count = pool->dig_link_enc_count; - /* for dpia, check preferred encoder first and then the next one */ - for (i = 0; i < enc_count; i++) - if (res_ctx->dio_link_enc_ref_cnts[(link->dpia_preferred_eng_id + i) % enc_count] == 0) - break; + /* Find stream encoder instance for the stream */ + if (stream) { + for (i = 0; i < pool->pipe_count; i++) { + if ((res_ctx->pipe_ctx[i].stream == stream) && + (res_ctx->pipe_ctx[i].stream_res.stream_enc != NULL)) { + stream_enc_inst = res_ctx->pipe_ctx[i].stream_res.stream_enc->id; + break; + } + } + } - return (i >= 0 && i < enc_count) ? (link->dpia_preferred_eng_id + i) % enc_count : -1; + /* Assign dpia preferred > stream enc instance > available */ + for (i = 0; i < enc_count; i++) { + if (res_ctx->dio_link_enc_ref_cnts[i] == 0) { + if (j == -1) + j = i; + + if (link->dpia_preferred_eng_id == i) { + j = i; + break; + } + + if (stream_enc_inst == i) { + j = stream_enc_inst; + } + } + } + return j; } static inline void acquire_dio_link_enc( @@ -2781,7 +2804,7 @@ static bool add_dio_link_enc_to_ctx(const struct dc *dc, retain_dio_link_enc(res_ctx, enc_index); } else { if (stream->link->is_dig_mapping_flexible) - enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool); + enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool, stream); else { int link_index = 0; @@ -2791,7 +2814,7 @@ static bool add_dio_link_enc_to_ctx(const struct dc *dc, * one into the acquiring link. */ if (enc_index >= 0 && is_dio_enc_acquired_by_other_link(stream->link, enc_index, &link_index)) { - int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool); + int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool, stream); if (new_enc_index >= 0) swap_dio_link_enc_to_muxable_ctx(context, pool, new_enc_index, enc_index); @@ -5201,7 +5224,7 @@ struct link_encoder *get_temp_dio_link_enc( enc_index = link->eng_id; if (enc_index < 0) - enc_index = find_free_dio_link_enc(res_ctx, link, pool); + enc_index = find_free_dio_link_enc(res_ctx, link, pool, NULL); if (enc_index >= 0) link_enc = pool->link_encoders[enc_index]; -- cgit v1.2.3 From 9ec11bb842b6dab109f64d3aa96e53d8243c546b Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Thu, 16 Oct 2025 08:44:08 +0200 Subject: drm/amd/display: Remove dc param from check_update [Why] dc_check_update_surfaces_for_stream should not have access to entire DC, especially not a mutable one. Concurrent checks should be able to run independently of one another, without risk of changing state. [How] * Replace dc and stream_status structs with new dc_check_config. * Move required fields from dc_debug and dc_caps to dc_check_config. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 51 ++++++++++------------ drivers/gpu/drm/amd/display/dc/dc.h | 17 +++++--- drivers/gpu/drm/amd/display/dc/dc_stream.h | 5 +-- .../display/dc/resource/dce100/dce100_resource.c | 7 ++- .../display/dc/resource/dce110/dce110_resource.c | 5 ++- .../display/dc/resource/dce112/dce112_resource.c | 7 ++- .../display/dc/resource/dce120/dce120_resource.c | 8 +++- .../amd/display/dc/resource/dce80/dce80_resource.c | 8 +++- .../amd/display/dc/resource/dcn10/dcn10_resource.c | 7 ++- .../amd/display/dc/resource/dcn20/dcn20_resource.c | 6 ++- .../display/dc/resource/dcn201/dcn201_resource.c | 6 ++- .../amd/display/dc/resource/dcn21/dcn21_resource.c | 6 ++- .../amd/display/dc/resource/dcn30/dcn30_resource.c | 6 ++- .../display/dc/resource/dcn301/dcn301_resource.c | 6 ++- .../display/dc/resource/dcn302/dcn302_resource.c | 6 ++- .../display/dc/resource/dcn303/dcn303_resource.c | 6 ++- .../amd/display/dc/resource/dcn31/dcn31_resource.c | 6 ++- .../display/dc/resource/dcn314/dcn314_resource.c | 6 ++- .../display/dc/resource/dcn315/dcn315_resource.c | 6 ++- .../display/dc/resource/dcn316/dcn316_resource.c | 6 ++- .../amd/display/dc/resource/dcn32/dcn32_resource.c | 6 ++- .../display/dc/resource/dcn321/dcn321_resource.c | 6 ++- .../amd/display/dc/resource/dcn35/dcn35_resource.c | 6 ++- .../display/dc/resource/dcn351/dcn351_resource.c | 6 ++- .../amd/display/dc/resource/dcn36/dcn36_resource.c | 6 ++- .../display/dc/resource/dcn401/dcn401_resource.c | 6 ++- 26 files changed, 152 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 246ea9754810..2eb02345dadd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1148,8 +1148,8 @@ static bool dc_construct(struct dc *dc, /* set i2c speed if not done by the respective dcnxxx__resource.c */ if (dc->caps.i2c_speed_in_khz_hdcp == 0) dc->caps.i2c_speed_in_khz_hdcp = dc->caps.i2c_speed_in_khz; - if (dc->caps.max_optimizable_video_width == 0) - dc->caps.max_optimizable_video_width = 5120; + if (dc->check_config.max_optimizable_video_width == 0) + dc->check_config.max_optimizable_video_width = 5120; dc->clk_mgr = dc_clk_mgr_create(dc->ctx, dc->res_pool->pp_smu, dc->res_pool->dccg); if (!dc->clk_mgr) goto fail; @@ -2656,7 +2656,7 @@ static bool is_surface_in_context( return false; } -static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u) +static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; enum surface_update_type update_type = UPDATE_TYPE_FAST; @@ -2760,7 +2760,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc *dc, } static enum surface_update_type get_scaling_info_update_type( - const struct dc *dc, + const struct dc_check_config *check_config, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; @@ -2790,7 +2790,7 @@ static enum surface_update_type get_scaling_info_update_type( /* Making dst rect smaller requires a bandwidth change */ update_flags->bits.bandwidth_change = 1; - if (u->scaling_info->src_rect.width > dc->caps.max_optimizable_video_width && + if (u->scaling_info->src_rect.width > check_config->max_optimizable_video_width && (u->scaling_info->clip_rect.width > u->surface->clip_rect.width || u->scaling_info->clip_rect.height > u->surface->clip_rect.height)) /* Changing clip size of a large surface may result in MPC slice count change */ @@ -2817,7 +2817,8 @@ static enum surface_update_type get_scaling_info_update_type( return UPDATE_TYPE_FAST; } -static enum surface_update_type det_surface_update(const struct dc *dc, +static enum surface_update_type det_surface_update( + const struct dc_check_config *check_config, const struct dc_surface_update *u) { enum surface_update_type type; @@ -2831,10 +2832,10 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags - type = get_plane_info_update_type(dc, u); + type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type); - type = get_scaling_info_update_type(dc, u); + type = get_scaling_info_update_type(check_config, u); elevate_update_type(&overall_type, type); if (u->flip_addr) { @@ -2911,7 +2912,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc, elevate_update_type(&overall_type, type); } - if (dc->debug.enable_legacy_fast_update && + if (check_config->enable_legacy_fast_update && (update_flags->bits.gamma_change || update_flags->bits.gamut_remap_change || update_flags->bits.input_csc_change || @@ -2946,11 +2947,11 @@ static void force_immediate_gsl_plane_flip(struct dc *dc, struct dc_surface_upda } static enum surface_update_type check_update_surfaces_for_stream( - struct dc *dc, + const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, - struct dc_stream_update *stream_update, - const struct dc_stream_status *stream_status) + struct dc_stream_update *stream_update +) { int i; enum surface_update_type overall_type = UPDATE_TYPE_FAST; @@ -2972,7 +2973,7 @@ static enum surface_update_type check_update_surfaces_for_stream( stream_update->integer_scaling_update) su_flags->bits.scaling = 1; - if (dc->debug.enable_legacy_fast_update && stream_update->out_transfer_func) + if (check_config->enable_legacy_fast_update && stream_update->out_transfer_func) su_flags->bits.out_tf = 1; if (stream_update->abm_level) @@ -3016,13 +3017,13 @@ static enum surface_update_type check_update_surfaces_for_stream( /* Output transfer function changes do not require bandwidth recalculation, * so don't trigger a full update */ - if (!dc->debug.enable_legacy_fast_update && stream_update->out_transfer_func) + if (!check_config->enable_legacy_fast_update && stream_update->out_transfer_func) su_flags->bits.out_tf = 1; } for (i = 0 ; i < surface_count; i++) { enum surface_update_type type = - det_surface_update(dc, &updates[i]); + det_surface_update(check_config, &updates[i]); elevate_update_type(&overall_type, type); } @@ -3036,22 +3037,17 @@ static enum surface_update_type check_update_surfaces_for_stream( * See :c:type:`enum surface_update_type ` for explanation of update types */ enum surface_update_type dc_check_update_surfaces_for_stream( - struct dc *dc, + const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, - struct dc_stream_update *stream_update, - const struct dc_stream_status *stream_status) + struct dc_stream_update *stream_update) { - int i; - enum surface_update_type type; - if (stream_update) stream_update->stream->update_flags.raw = 0; - for (i = 0; i < surface_count; i++) + for (size_t i = 0; i < surface_count; i++) updates[i].surface->update_flags.raw = 0; - type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status); - return type; + return check_update_surfaces_for_stream(check_config, updates, surface_count, stream_update); } static struct dc_stream_status *stream_get_status( @@ -3472,8 +3468,7 @@ static bool update_planes_and_stream_state(struct dc *dc, context = dc->current_state; update_type = dc_check_update_surfaces_for_stream( - dc, srf_updates, surface_count, stream_update, stream_status); - + &dc->check_config, srf_updates, surface_count, stream_update); if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream)) update_type = UPDATE_TYPE_FULL; @@ -5208,7 +5203,7 @@ static bool update_planes_and_stream_v2(struct dc *dc, commit_minimal_transition_state_in_dc_update(dc, context, stream, srf_updates, surface_count); - if (is_fast_update_only && !dc->debug.enable_legacy_fast_update) { + if (is_fast_update_only && !dc->check_config.enable_legacy_fast_update) { commit_planes_for_stream_fast(dc, srf_updates, surface_count, @@ -5251,7 +5246,7 @@ static void commit_planes_and_stream_update_on_current_context(struct dc *dc, stream_update); if (fast_update_only(dc, fast_update, srf_updates, surface_count, stream_update, stream) && - !dc->debug.enable_legacy_fast_update) + !dc->check_config.enable_legacy_fast_update) commit_planes_for_stream_fast(dc, srf_updates, surface_count, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 93b39710db53..0fcecf1c5deb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -286,6 +286,15 @@ struct dc_scl_caps { bool sharpener_support; }; +struct dc_check_config { + /** + * max video plane width that can be safely assumed to be always + * supported by single DPP pipe. + */ + unsigned int max_optimizable_video_width; + bool enable_legacy_fast_update; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -301,11 +310,6 @@ struct dc_caps { unsigned int max_cursor_size; unsigned int max_buffered_cursor_size; unsigned int max_video_width; - /* - * max video plane width that can be safely assumed to be always - * supported by single DPP pipe. - */ - unsigned int max_optimizable_video_width; unsigned int min_horizontal_blanking_period; int linear_pitch_alignment; bool dcc_const_color; @@ -1128,7 +1132,6 @@ struct dc_debug_options { uint32_t fpo_vactive_min_active_margin_us; uint32_t fpo_vactive_max_blank_us; bool enable_hpo_pg_support; - bool enable_legacy_fast_update; bool disable_dc_mode_overwrite; bool replay_skip_crtc_disabled; bool ignore_pg;/*do nothing, let pmfw control it*/ @@ -1160,7 +1163,6 @@ struct dc_debug_options { bool enable_ips_visual_confirm; unsigned int sharpen_policy; unsigned int scale_to_sharpness_policy; - bool skip_full_updated_if_possible; unsigned int enable_oled_edp_power_up_opt; bool enable_hblank_borrow; bool force_subvp_df_throttle; @@ -1711,6 +1713,7 @@ struct dc { struct dc_debug_options debug; struct dc_versions versions; struct dc_caps caps; + struct dc_check_config check_config; struct dc_cap_funcs cap_funcs; struct dc_config config; struct dc_bounding_box_overrides bb_overrides; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 76cf9fdedab0..c68153c77972 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -474,11 +474,10 @@ void dc_enable_stereo( void dc_trigger_sync(struct dc *dc, struct dc_state *context); enum surface_update_type dc_check_update_surfaces_for_stream( - struct dc *dc, + const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, - struct dc_stream_update *stream_update, - const struct dc_stream_status *stream_status); + struct dc_stream_update *stream_update); /** * Create a new default stream for the requested sink diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index c4b4dc3ad8c9..550466de1a66 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -402,8 +402,10 @@ static const struct dc_plane_cap plane_cap = { } }; -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, +static const struct dc_debug_options debug_defaults = { 0 }; + +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, }; #define CTX ctx @@ -1093,6 +1095,7 @@ static bool dce100_resource_construct( dc->caps.disable_dp_clk_share = true; dc->caps.extended_aux_timeout_support = false; dc->debug = debug_defaults; + dc->check_config = config_defaults; for (i = 0; i < pool->base.pipe_count; i++) { pool->base.timing_generators[i] = diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index cccde5a6f3cd..9e14ffb3c942 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -424,7 +424,9 @@ static const struct dc_plane_cap plane_cap = { 64 }; -static const struct dc_debug_options debug_defaults = { +static const struct dc_debug_options debug_defaults = { 0 }; + +static const struct dc_check_config config_defaults = { .enable_legacy_fast_update = true, }; @@ -1376,6 +1378,7 @@ static bool dce110_resource_construct( dc->caps.is_apu = true; dc->caps.extended_aux_timeout_support = false; dc->debug = debug_defaults; + dc->check_config = config_defaults; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 869a8e515fc0..62977bcdeaa0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -429,8 +429,10 @@ static const struct dc_plane_cap plane_cap = { 64 }; -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, +static const struct dc_debug_options debug_defaults = { 0 }; + +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, }; #define CTX ctx @@ -1247,6 +1249,7 @@ static bool dce112_resource_construct( dc->caps.dual_link_dvi = true; dc->caps.extended_aux_timeout_support = false; dc->debug = debug_defaults; + dc->check_config = config_defaults; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 540e04ec1e2d..0770ea37183f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -526,8 +526,11 @@ static const struct dc_plane_cap plane_cap = { }; static const struct dc_debug_options debug_defaults = { - .disable_clock_gate = true, - .enable_legacy_fast_update = true, + .disable_clock_gate = true, +}; + +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, }; static struct clock_source *dce120_clock_source_create( @@ -1089,6 +1092,7 @@ static bool dce120_resource_construct( dc->caps.psp_setup_panel_mode = true; dc->caps.extended_aux_timeout_support = false; dc->debug = debug_defaults; + dc->check_config = config_defaults; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 5b7769745202..c4a56ec60b82 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -418,8 +418,10 @@ static const struct dc_plane_cap plane_cap = { } }; -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, +static const struct dc_debug_options debug_defaults = { 0 }; + +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, }; static const struct dce_dmcu_registers dmcu_regs = { @@ -919,6 +921,7 @@ static bool dce80_construct( dc->caps.dual_link_dvi = true; dc->caps.extended_aux_timeout_support = false; dc->debug = debug_defaults; + dc->check_config = config_defaults; /************************************************* * Create resources * @@ -1320,6 +1323,7 @@ static bool dce83_construct( dc->caps.min_horizontal_blanking_period = 80; dc->caps.is_apu = true; dc->debug = debug_defaults; + dc->check_config = config_defaults; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index 652c05c35494..f12367adf145 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -556,10 +556,13 @@ static const struct dc_debug_options debug_defaults_drv = { .recovery_enabled = false, /*enable this by default after testing.*/ .max_downscale_src_width = 3840, .underflow_assert_delay_us = 0xFFFFFFFF, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static void dcn10_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN10_DPP(*dpp)); @@ -1395,6 +1398,8 @@ static bool dcn10_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 0; + dc->debug = debug_defaults_drv; + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 30f155dc54e5..6679c1a14f2f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -718,10 +718,13 @@ static const struct dc_debug_options debug_defaults_drv = { .scl_reset_length10 = true, .sanity_checks = false, .underflow_assert_delay_us = 0xFFFFFFFF, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + void dcn20_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); @@ -2471,6 +2474,7 @@ static bool dcn20_resource_construct( dc->caps.color.mpc.ocsc = 1; dc->caps.dp_hdmi21_pcon_support = true; + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 9a80bebcee48..055107843a70 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -614,10 +614,13 @@ static const struct dc_debug_options debug_defaults_drv = { .sanity_checks = false, .underflow_assert_delay_us = 0xFFFFFFFF, .enable_tri_buf = true, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static void dcn201_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN201_DPP(*dpp)); @@ -1151,6 +1154,7 @@ static bool dcn201_resource_construct( dc->caps.color.mpc.ocsc = 1; dc->debug = debug_defaults_drv; + dc->check_config = config_defaults; /*a0 only, remove later*/ dc->work_arounds.no_connect_phy_config = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 918742a42ded..2060acd5ae09 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -626,10 +626,13 @@ static const struct dc_debug_options debug_defaults_drv = { .usbc_combo_phy_reset_wa = true, .dmub_command_table = true, .use_max_lb = true, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1458,6 +1461,7 @@ static bool dcn21_resource_construct( dc->caps.color.mpc.ocsc = 1; dc->caps.dp_hdmi21_pcon_support = true; + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index ff63f59ff928..d0ebb733e802 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -727,10 +727,13 @@ static const struct dc_debug_options debug_defaults_drv = { .dmub_command_table = true, .use_max_lb = true, .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = false, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -2374,6 +2377,7 @@ static bool dcn30_resource_construct( dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 82a205a7c25c..3ad6a3d4858e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -701,10 +701,13 @@ static const struct dc_debug_options debug_defaults_drv = { .dmub_command_table = true, .use_max_lb = false, .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static void dcn301_dpp_destroy(struct dpp **dpp) { kfree(TO_DCN20_DPP(*dpp)); @@ -1498,6 +1501,7 @@ static bool dcn301_resource_construct( bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 61623cb518d9..c0d4a1dc94f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -98,10 +98,13 @@ static const struct dc_debug_options debug_defaults_drv = { .dmub_command_table = true, .use_max_lb = true, .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = false, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1290,6 +1293,7 @@ static bool dcn302_resource_construct( &is_vbios_interop_enabled); dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 02b9a84f2db3..75e09c2c283e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -98,10 +98,13 @@ static const struct dc_debug_options debug_defaults_drv = { .dmub_command_table = true, .use_max_lb = true, .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = false, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1234,6 +1237,7 @@ static bool dcn303_resource_construct( bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 3ed7f50554e2..0d667b54ccf8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -888,12 +888,15 @@ static const struct dc_debug_options debug_defaults_drv = { } }, .disable_z10 = true, - .enable_legacy_fast_update = true, .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ .dml_hostvm_override = DML_HOSTVM_OVERRIDE_FALSE, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1978,6 +1981,7 @@ static bool dcn31_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index d4917a35b991..3ccde75a4ecb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -924,12 +924,15 @@ static const struct dc_debug_options debug_defaults_drv = { }, .seamless_boot_odm_combine = true, - .enable_legacy_fast_update = true, .using_dml2 = false, .disable_dsc_power_gate = true, .min_disp_clk_khz = 100000, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1910,6 +1913,7 @@ static bool dcn314_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 82cc78c291d8..38f6328cda92 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -887,11 +887,14 @@ static const struct dc_debug_options debug_defaults_drv = { .afmt = true, } }, - .enable_legacy_fast_update = true, .psr_power_use_phy_fsm = 0, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1939,6 +1942,7 @@ static bool dcn315_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 636110e48d01..5a95dd54cb42 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -882,10 +882,13 @@ static const struct dc_debug_options debug_defaults_drv = { .afmt = true, } }, - .enable_legacy_fast_update = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1815,6 +1818,7 @@ static bool dcn316_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 4c2e8aa96d1f..81e64e17d0cb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -738,10 +738,13 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dp_plus_plus_wa = true, .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, - .enable_legacy_fast_update = false, .disable_stutter_for_wm_program = true }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static struct dce_aux *dcn32_aux_engine_create( struct dc_context *ctx, uint32_t inst) @@ -2294,6 +2297,7 @@ static bool dcn32_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index ad214986f7ac..3466ca34c93f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -731,11 +731,14 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_subvp_high_refresh = false, .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, - .enable_legacy_fast_update = false, .disable_dc_mode_overwrite = true, .using_dml2 = false, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static struct dce_aux *dcn321_aux_engine_create( struct dc_context *ctx, uint32_t inst) @@ -1797,6 +1800,7 @@ static bool dcn321_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 3a83a63d2116..ef69898d2cc5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -767,7 +767,6 @@ static const struct dc_debug_options debug_defaults_drv = { .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, - .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, .disable_idle_power_optimizations = false, .dmcub_emulation = false, @@ -788,6 +787,10 @@ static const struct dc_debug_options debug_defaults_drv = { .min_disp_clk_khz = 50000, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1946,6 +1949,7 @@ static bool dcn35_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index d00cc4030be3..f3c614c4490c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -747,7 +747,6 @@ static const struct dc_debug_options debug_defaults_drv = { .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, - .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, .disable_idle_power_optimizations = false, .dmcub_emulation = false, @@ -768,6 +767,10 @@ static const struct dc_debug_options debug_defaults_drv = { .min_disp_clk_khz = 50000, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1917,6 +1920,7 @@ static bool dcn351_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index f4b28617b2de..6469d5fe2e6d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -748,7 +748,6 @@ static const struct dc_debug_options debug_defaults_drv = { .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, - .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, .disable_idle_power_optimizations = false, .dmcub_emulation = false, @@ -769,6 +768,10 @@ static const struct dc_debug_options debug_defaults_drv = { .min_disp_clk_khz = 50000, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = true, +}; + static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, @@ -1918,6 +1921,7 @@ static bool dcn36_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 8e2d3781752c..130058d7a70c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -721,7 +721,6 @@ static const struct dc_debug_options debug_defaults_drv = { .alloc_extra_way_for_cursor = true, .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, - .enable_legacy_fast_update = false, .dcc_meta_propagation_delay_us = 10, .fams_version = { .minor = 1, @@ -737,6 +736,10 @@ static const struct dc_debug_options debug_defaults_drv = { .force_cositing = CHROMA_COSITING_NONE + 1, }; +static const struct dc_check_config config_defaults = { + .enable_legacy_fast_update = false, +}; + static struct dce_aux *dcn401_aux_engine_create( struct dc_context *ctx, uint32_t inst) @@ -1995,6 +1998,7 @@ static bool dcn401_resource_construct( dc->caps.vbios_lttpr_aware = true; } } + dc->check_config = config_defaults; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; -- cgit v1.2.3 From 14bb17cc37e03e105c5e989381b1dd9769535417 Mon Sep 17 00:00:00 2001 From: Zhongwei Zhang Date: Tue, 14 Oct 2025 10:46:49 +0800 Subject: drm/amd/display: init dispclk from bootup clock for DCN315 [Why] Driver does not pick up and save vbios's clocks during init clocks, the dispclk in clk_mgr will keep 0. OS might change the timing (lower the pixel clock) after boot. Then driver will set the dispclk to lower when safe_to_lower is false, for in clk_mgr dispclk is zero, it's illegal and causes garbage. [How] Dump and save the vbios's clocks, and init the dispclk in dcn315_init_clocks. Reviewed-by: Charlene Liu Signed-off-by: Zhongwei Zhang Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 87 +++++++++++++++++++++- .../amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h | 1 + .../display/dc/resource/dcn315/dcn315_resource.c | 1 + 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index b315ed91e010..3a881451e9da 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -40,7 +40,7 @@ #include "dm_helpers.h" #include "dc_dmub_srv.h" - +#include "reg_helper.h" #include "logger_types.h" #undef DC_LOGGER #define DC_LOGGER \ @@ -48,9 +48,43 @@ #include "link_service.h" +#define MAX_INSTANCE 7 +#define MAX_SEGMENT 8 + +struct IP_BASE_INSTANCE { + unsigned int segment[MAX_SEGMENT]; +}; + +struct IP_BASE { + struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; +}; + +static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, 0, 0, 0 } }, + { { 0x00016E00, 0x02401C00, 0, 0, 0, 0, 0, 0 } }, + { { 0x00017000, 0x02402000, 0, 0, 0, 0, 0, 0 } }, + { { 0x00017200, 0x02402400, 0, 0, 0, 0, 0, 0 } }, + { { 0x0001B000, 0x0242D800, 0, 0, 0, 0, 0, 0 } }, + { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } } } }; + +#define regCLK1_CLK0_CURRENT_CNT 0x0314 +#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK1_CURRENT_CNT 0x0315 +#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK2_CURRENT_CNT 0x0316 +#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK3_CURRENT_CNT 0x0317 +#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK4_CURRENT_CNT 0x0318 +#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK5_CURRENT_CNT 0x0319 +#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0 + #define TO_CLK_MGR_DCN315(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn315, base) +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + #define UNSUPPORTED_DCFCLK 10000000 #define MIN_DPP_DISP_CLK 100000 @@ -245,9 +279,38 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } +static void dcn315_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); +} + static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) { + struct dcn35_clk_internal internal = {0}; + + dcn315_dump_clk_registers_internal(&internal, clk_mgr_base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; return; } @@ -594,13 +657,32 @@ static struct clk_mgr_funcs dcn315_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn315_update_clocks, - .init_clocks = dcn31_init_clocks, + .init_clocks = dcn315_init_clocks, .enable_pme_wa = dcn315_enable_pme_wa, .are_clock_states_equal = dcn31_are_clock_states_equal, .notify_wm_ranges = dcn315_notify_wm_ranges }; extern struct clk_mgr_funcs dcn3_fpga_funcs; +void dcn315_init_clocks(struct clk_mgr *clk_mgr) +{ + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + struct clk_mgr_dcn315 *clk_mgr_dcn315 = TO_CLK_MGR_DCN315(clk_mgr_int); + struct clk_log_info log_info = {0}; + + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + // Assumption is that boot state always supports pstate + clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk + clk_mgr->clks.p_state_change_support = true; + clk_mgr->clks.prev_p_state_change_support = true; + clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; + + dcn315_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn315->base.base, &log_info); + clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000; +} + void dcn315_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn315 *clk_mgr, @@ -661,6 +743,7 @@ void dcn315_clk_mgr_construct( /* Saved clocks configured at boot for debug purposes */ dcn315_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); + clk_mgr->base.base.clks.dispclk_khz = clk_mgr->base.base.boot_snapshot.dispclk * 1000; clk_mgr->base.base.dprefclk_khz = 600000; clk_mgr->base.base.dprefclk_khz = dcn315_smu_get_dpref_clk(&clk_mgr->base); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h index ac36ddf5dd1a..642ae3d4a790 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.h @@ -44,6 +44,7 @@ void dcn315_clk_mgr_construct(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg); +void dcn315_init_clocks(struct clk_mgr *clk_mgr); void dcn315_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); #endif //__DCN315_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 38f6328cda92..4e962f522f1b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -889,6 +889,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .psr_power_use_phy_fsm = 0, .using_dml2 = false, + .min_disp_clk_khz = 100000, }; static const struct dc_check_config config_defaults = { -- cgit v1.2.3 From fb3896c18b224f2aa9dd8a4f73e6ba23c52b9691 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Wed, 15 Oct 2025 14:45:43 -0400 Subject: drm/amd/display: Add dc interface to log pre os firmware information [Why] Pre os firmware information is useful to debug pre os to post os fw transition issues. [How] Add dc interface dc_log_preos_dmcub_info() to log pre os firmware information. Reviewed-by: Cruise Hung Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++ drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 21 ++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 7 ++++ drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 25 +++++++++++++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 39 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h | 2 ++ drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 9 ++++++ 8 files changed, 110 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 2eb02345dadd..6ef9dd9ab7a3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6401,3 +6401,8 @@ void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, if (dc->hwss.get_underflow_debug_data) dc->hwss.get_underflow_debug_data(dc, tg, out_data); } + +void dc_log_preos_dmcub_info(const struct dc *dc) +{ + dc_dmub_srv_log_preos_dmcub_info(dc->ctx->dmub_srv); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0fcecf1c5deb..4a5aa0bd3bc4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2727,6 +2727,8 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index); +void dc_log_preos_dmcub_info(const struct dc *dc); + /* DSC Interfaces */ #include "dc_dsc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 4b20c01bf646..fffbf1983143 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -2344,3 +2344,24 @@ void dc_dmub_srv_release_hw(const struct dc *dc) dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } + +void dc_dmub_srv_log_preos_dmcub_info(struct dc_dmub_srv *dc_dmub_srv) +{ + struct dmub_srv *dmub; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return; + + dmub = dc_dmub_srv->dmub; + + if (dmub_srv_get_preos_info(dmub)) { + DC_LOG_DEBUG("%s: PreOS DMCUB Info", __func__); + DC_LOG_DEBUG("fw_version : 0x%08x", dmub->preos_info.fw_version); + DC_LOG_DEBUG("boot_options : 0x%08x", dmub->preos_info.boot_options); + DC_LOG_DEBUG("boot_status : 0x%08x", dmub->preos_info.boot_status); + DC_LOG_DEBUG("trace_buffer_phy_addr : 0x%016llx", dmub->preos_info.trace_buffer_phy_addr); + DC_LOG_DEBUG("trace_buffer_size_bytes : 0x%08x", dmub->preos_info.trace_buffer_size); + DC_LOG_DEBUG("fb_base : 0x%016llx", dmub->preos_info.fb_base); + DC_LOG_DEBUG("fb_offset : 0x%016llx", dmub->preos_info.fb_offset); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 9bb00d48fd5e..72e0a41f39f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -367,4 +367,11 @@ bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc); * @dc - pointer to DC object */ void dc_dmub_srv_release_hw(const struct dc *dc); + +/** + * dc_dmub_srv_log_preos_dmcub_info() - Logs preos dmcub fw info. + * + * @dc - pointer to DC object + */ +void dc_dmub_srv_log_preos_dmcub_info(struct dc_dmub_srv *dc_dmub_srv); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index f25c2fc2f98f..9d0168986fe7 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -363,6 +363,19 @@ struct dmub_diagnostic_data { uint8_t is_pwait : 1; }; +/** + * struct dmub_preos_info - preos fw info before loading post os fw. + */ +struct dmub_preos_info { + uint64_t fb_base; + uint64_t fb_offset; + uint64_t trace_buffer_phy_addr; + uint32_t trace_buffer_size; + uint32_t fw_version; + uint32_t boot_status; + uint32_t boot_options; +}; + struct dmub_srv_inbox { /* generic status */ uint64_t num_submitted; @@ -488,6 +501,7 @@ struct dmub_srv_hw_funcs { uint32_t (*get_current_time)(struct dmub_srv *dmub); void (*get_diagnostic_data)(struct dmub_srv *dmub); + bool (*get_preos_fw_info)(struct dmub_srv *dmub); bool (*should_detect)(struct dmub_srv *dmub); void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); @@ -588,6 +602,7 @@ struct dmub_srv { enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; struct dmub_fb lsdma_rb_fb; + struct dmub_preos_info preos_info; }; /** @@ -1073,4 +1088,14 @@ enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub, */ enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub); +/** + * dmub_srv_get_preos_info() - retrieves preos fw info + * @dmub: the dmub service + * + * Return: + * true - preos fw info retrieved successfully + * false - preos fw info not retrieved successfully + */ +bool dmub_srv_get_preos_info(struct dmub_srv *dmub); + #endif /* _DMUB_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index b40482dbd6ad..e13557ed97be 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -521,6 +521,45 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub) dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); } + +bool dmub_dcn35_get_preos_fw_info(struct dmub_srv *dmub) +{ + uint64_t region3_cw5_offset; + uint32_t top_addr, top_addr_enable, offset_low; + uint32_t offset_high, base_addr, fw_version; + bool is_vbios_fw = false; + + memset(&dmub->preos_info, 0, sizeof(dmub->preos_info)); + + fw_version = REG_READ(DMCUB_SCRATCH1); + is_vbios_fw = ((fw_version >> 6) & 0x01) ? true : false; + if (!is_vbios_fw) + return false; + + dmub->preos_info.boot_status = REG_READ(DMCUB_SCRATCH0); + dmub->preos_info.fw_version = REG_READ(DMCUB_SCRATCH1); + dmub->preos_info.boot_options = REG_READ(DMCUB_SCRATCH14); + REG_GET(DMCUB_REGION3_CW5_TOP_ADDRESS, + DMCUB_REGION3_CW5_ENABLE, &top_addr_enable); + if (top_addr_enable) { + dmub_dcn35_get_fb_base_offset(dmub, + &dmub->preos_info.fb_base, &dmub->preos_info.fb_offset); + offset_low = REG_READ(DMCUB_REGION3_CW5_OFFSET); + offset_high = REG_READ(DMCUB_REGION3_CW5_OFFSET_HIGH); + region3_cw5_offset = ((uint64_t)offset_high << 32) | offset_low; + dmub->preos_info.trace_buffer_phy_addr = region3_cw5_offset + - dmub->preos_info.fb_base + dmub->preos_info.fb_offset; + + REG_GET(DMCUB_REGION3_CW5_TOP_ADDRESS, + DMCUB_REGION3_CW5_TOP_ADDRESS, &top_addr); + base_addr = REG_READ(DMCUB_REGION3_CW5_BASE_ADDRESS) & 0x1FFFFFFF; + dmub->preos_info.trace_buffer_size = + (top_addr > base_addr) ? (top_addr - base_addr + 1) : 0; + } + + return true; +} + void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub) { /* DMCUB_REGION3_TMR_AXI_SPACE values: diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h index 39fcb7275da5..92e6695a2c9b 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h @@ -285,4 +285,6 @@ bool dmub_dcn35_is_hw_powered_up(struct dmub_srv *dmub); void dmub_srv_dcn35_regs_init(struct dmub_srv *dmub, struct dc_context *ctx); +bool dmub_dcn35_get_preos_fw_info(struct dmub_srv *dmub); + #endif /* _DMUB_DCN35_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 0244c9b44ecc..a657efda89ce 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -359,6 +359,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_current_time = dmub_dcn35_get_current_time; funcs->get_diagnostic_data = dmub_dcn35_get_diagnostic_data; + funcs->get_preos_fw_info = dmub_dcn35_get_preos_fw_info; funcs->init_reg_offsets = dmub_srv_dcn35_regs_init; if (asic == DMUB_ASIC_DCN351) @@ -1372,3 +1373,11 @@ enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub) return DMUB_STATUS_OK; } + +bool dmub_srv_get_preos_info(struct dmub_srv *dmub) +{ + if (!dmub || !dmub->hw_funcs.get_preos_fw_info) + return false; + + return dmub->hw_funcs.get_preos_fw_info(dmub); +} -- cgit v1.2.3 From 518a368c57a0e63f531e471baa9499c45ab87f7c Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 2 Oct 2025 12:44:19 -0400 Subject: drm/amd/display: Update cursor offload assignments [Why & How] - Cursor lines per chunk must be assigned from hubp->att and not hubp->pos (the one in hubp->pos is unassigned) - In DCN401 DPP, cur0_enable in attribute struct must be assigned as this is the field passed to DMU - DCN401 should not program position in driver if offload is enabled Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alvin Lee Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 1 + .../drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c | 24 ++++++++++++---------- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 2 +- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 3adc17f2fc35..62bf7cea21d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -141,6 +141,7 @@ void dpp401_set_cursor_position( } dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; + dpp_base->att.cur0_ctl.bits.cur0_enable = cur_en; } void dpp401_set_optional_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 3a2e0848173e..f01eae50d02f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -783,21 +783,23 @@ void hubp401_cursor_set_position( if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); - REG_UPDATE(CURSOR_CONTROL, - CURSOR_ENABLE, cur_en); + if (!hubp->cursor_offload) + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); } - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, x_pos, - CURSOR_Y_POSITION, y_pos); + if (!hubp->cursor_offload) { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, x_pos, + CURSOR_Y_POSITION, y_pos); - REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, pos->x_hotspot, - CURSOR_HOT_SPOT_Y, pos->y_hotspot); - - REG_SET(CURSOR_DST_OFFSET, 0, - CURSOR_DST_X_OFFSET, dst_x_offset); + REG_SET_2(CURSOR_HOT_SPOT, 0, + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); + REG_SET(CURSOR_DST_OFFSET, 0, + CURSOR_DST_X_OFFSET, dst_x_offset); + } /* Cursor Position Register Config */ hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 9333b7fde3bc..7aa0f452e8f7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1699,7 +1699,7 @@ void dcn35_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pipe p->CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE = hubp->att.cur_ctl.bits.mode; p->CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY = hubp->pos.cur_ctl.bits.cur_2x_magnify; p->CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH = hubp->att.cur_ctl.bits.pitch; - p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->pos.cur_ctl.bits.line_per_chunk; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->att.cur_ctl.bits.line_per_chunk; p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_ENABLE = dpp->att.cur0_ctl.bits.cur0_enable; p->CNVC_CUR0_CURSOR0_CONTROL__CUR0_MODE = dpp->att.cur0_ctl.bits.mode; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b9c357a40707..f02edc9371b0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2996,7 +2996,7 @@ void dcn401_update_cursor_offload_pipe(struct dc *dc, const struct pipe_ctx *pip p->CURSOR0_0_CURSOR_CONTROL__CURSOR_MODE = hubp->att.cur_ctl.bits.mode; p->CURSOR0_0_CURSOR_CONTROL__CURSOR_2X_MAGNIFY = hubp->pos.cur_ctl.bits.cur_2x_magnify; p->CURSOR0_0_CURSOR_CONTROL__CURSOR_PITCH = hubp->att.cur_ctl.bits.pitch; - p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->pos.cur_ctl.bits.line_per_chunk; + p->CURSOR0_0_CURSOR_CONTROL__CURSOR_LINES_PER_CHUNK = hubp->att.cur_ctl.bits.line_per_chunk; p->CM_CUR0_CURSOR0_CONTROL__CUR0_ENABLE = dpp->att.cur0_ctl.bits.cur0_enable; p->CM_CUR0_CURSOR0_CONTROL__CUR0_MODE = dpp->att.cur0_ctl.bits.mode; -- cgit v1.2.3 From 8e6361a9e09846ff1fd3a908345862b8dc2632fd Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Wed, 30 Jul 2025 15:16:15 +0200 Subject: drm/amd/display: Add lock descriptor to check_update [Why] DM locks the global DC lock during all updates, even if multiple updates touch different resources and could be run in parallel. [How] Add extra enum specifying which kind of resources should be locked. Reviewed-by: Aric Cyr Signed-off-by: Dominik Kaszewski Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 125 ++++++++++++++++------------- drivers/gpu/drm/amd/display/dc/dc.h | 13 +++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 +- 3 files changed, 82 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 6ef9dd9ab7a3..1851a35f629f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -148,10 +148,16 @@ static const char DC_BUILD_ID[] = "production-build"; /* Private functions */ -static inline void elevate_update_type(enum surface_update_type *original, enum surface_update_type new) +static inline void elevate_update_type( + struct surface_update_descriptor *descriptor, + enum surface_update_type new_type, + enum dc_lock_descriptor new_locks +) { - if (new > *original) - *original = new; + if (new_type > descriptor->update_type) + descriptor->update_type = new_type; + + descriptor->lock_descriptor |= new_locks; } static void destroy_links(struct dc *dc) @@ -2656,47 +2662,49 @@ static bool is_surface_in_context( return false; } -static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) +static struct surface_update_descriptor get_plane_info_update_type(const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; - enum surface_update_type update_type = UPDATE_TYPE_FAST; + struct surface_update_descriptor update_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE }; if (!u->plane_info) - return UPDATE_TYPE_FAST; + return update_type; + + elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_PLANE); if (u->plane_info->color_space != u->surface->color_space) { update_flags->bits.color_space_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) { update_flags->bits.horizontal_mirror_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->rotation != u->surface->rotation) { update_flags->bits.rotation_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->format != u->surface->format) { update_flags->bits.pixel_format_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->stereo_format != u->surface->stereo_format) { update_flags->bits.stereo_format_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) { update_flags->bits.per_pixel_alpha_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) { update_flags->bits.global_alpha_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->dcc.enable != u->surface->dcc.enable @@ -2708,7 +2716,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa * recalculate stutter period. */ update_flags->bits.dcc_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (resource_pixel_format_to_bpp(u->plane_info->format) != @@ -2717,33 +2725,33 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa * and DML calculation */ update_flags->bits.bpp_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (u->plane_info->plane_size.surface_pitch != u->surface->plane_size.surface_pitch || u->plane_info->plane_size.chroma_pitch != u->surface->plane_size.chroma_pitch) { update_flags->bits.plane_size_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } const struct dc_tiling_info *tiling = &u->plane_info->tiling_info; if (memcmp(tiling, &u->surface->tiling_info, sizeof(*tiling)) != 0) { update_flags->bits.swizzle_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); switch (tiling->gfxversion) { case DcGfxVersion9: case DcGfxVersion10: case DcGfxVersion11: if (tiling->gfx9.swizzle != DC_SW_LINEAR) { - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); update_flags->bits.bandwidth_change = 1; } break; case DcGfxAddr3: if (tiling->gfx_addr3.swizzle != DC_ADDR3_SW_LINEAR) { - elevate_update_type(&update_type, UPDATE_TYPE_FULL); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); update_flags->bits.bandwidth_change = 1; } break; @@ -2759,14 +2767,17 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa return update_type; } -static enum surface_update_type get_scaling_info_update_type( +static struct surface_update_descriptor get_scaling_info_update_type( const struct dc_check_config *check_config, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; + struct surface_update_descriptor update_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE }; if (!u->scaling_info) - return UPDATE_TYPE_FAST; + return update_type; + + elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_PLANE); if (u->scaling_info->src_rect.width != u->surface->src_rect.width || u->scaling_info->src_rect.height != u->surface->src_rect.height @@ -2809,40 +2820,41 @@ static enum surface_update_type get_scaling_info_update_type( if (update_flags->bits.clock_change || update_flags->bits.bandwidth_change || update_flags->bits.scaling_change) - return UPDATE_TYPE_FULL; + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); if (update_flags->bits.position_change) - return UPDATE_TYPE_MED; + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); - return UPDATE_TYPE_FAST; + return update_type; } -static enum surface_update_type det_surface_update( +static struct surface_update_descriptor det_surface_update( const struct dc_check_config *check_config, - const struct dc_surface_update *u) + struct dc_surface_update *u) { - enum surface_update_type type; - enum surface_update_type overall_type = UPDATE_TYPE_FAST; + struct surface_update_descriptor overall_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE }; union surface_update_flags *update_flags = &u->surface->update_flags; if (u->surface->force_full_update) { update_flags->raw = 0xFFFFFFFF; - return UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + return overall_type; } update_flags->raw = 0; // Reset all flags - type = get_plane_info_update_type(u); - elevate_update_type(&overall_type, type); + struct surface_update_descriptor inner_type = get_plane_info_update_type(u); - type = get_scaling_info_update_type(check_config, u); - elevate_update_type(&overall_type, type); + elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor); + + inner_type = get_scaling_info_update_type(check_config, u); + elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor); if (u->flip_addr) { update_flags->bits.addr_update = 1; if (u->flip_addr->address.tmz_surface != u->surface->address.tmz_surface) { update_flags->bits.tmz_changed = 1; - elevate_update_type(&overall_type, UPDATE_TYPE_FULL); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } } if (u->in_transfer_func) @@ -2878,13 +2890,15 @@ static enum surface_update_type det_surface_update( if (u->hdr_mult.value) if (u->hdr_mult.value != u->surface->hdr_mult.value) { update_flags->bits.hdr_mult = 1; - elevate_update_type(&overall_type, UPDATE_TYPE_MED); + // TODO: Should be fast? + elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (u->sdr_white_level_nits) if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { update_flags->bits.sdr_white_level_nits = 1; - elevate_update_type(&overall_type, UPDATE_TYPE_FULL); + // TODO: Should be fast? + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (u->cm2_params) { @@ -2898,18 +2912,16 @@ static enum surface_update_type det_surface_update( update_flags->bits.mcm_transfer_function_enable_change = 1; } if (update_flags->bits.in_transfer_func_change) { - type = UPDATE_TYPE_MED; - elevate_update_type(&overall_type, type); + // TODO: Fast? + elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); } if (update_flags->bits.lut_3d && u->surface->mcm_luts.lut3d_data.lut3d_src != DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { - type = UPDATE_TYPE_FULL; - elevate_update_type(&overall_type, type); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (update_flags->bits.mcm_transfer_function_enable_change) { - type = UPDATE_TYPE_FULL; - elevate_update_type(&overall_type, type); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (check_config->enable_legacy_fast_update && @@ -2917,8 +2929,7 @@ static enum surface_update_type det_surface_update( update_flags->bits.gamut_remap_change || update_flags->bits.input_csc_change || update_flags->bits.coeff_reduction_change)) { - type = UPDATE_TYPE_FULL; - elevate_update_type(&overall_type, type); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } return overall_type; } @@ -2946,28 +2957,28 @@ static void force_immediate_gsl_plane_flip(struct dc *dc, struct dc_surface_upda } } -static enum surface_update_type check_update_surfaces_for_stream( +static struct surface_update_descriptor check_update_surfaces_for_stream( const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, - struct dc_stream_update *stream_update -) + struct dc_stream_update *stream_update) { - int i; - enum surface_update_type overall_type = UPDATE_TYPE_FAST; + struct surface_update_descriptor overall_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE }; if (stream_update && stream_update->pending_test_pattern) { - overall_type = UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } if (stream_update && stream_update->hw_cursor_req) { - overall_type = UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); } /* some stream updates require passive update */ if (stream_update) { union stream_update_flags *su_flags = &stream_update->stream->update_flags; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + if ((stream_update->src.height != 0 && stream_update->src.width != 0) || (stream_update->dst.height != 0 && stream_update->dst.width != 0) || stream_update->integer_scaling_update) @@ -3009,7 +3020,7 @@ static enum surface_update_type check_update_surfaces_for_stream( su_flags->bits.out_csc = 1; if (su_flags->raw != 0) - overall_type = UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); if (stream_update->output_csc_transform) su_flags->bits.out_csc = 1; @@ -3021,11 +3032,11 @@ static enum surface_update_type check_update_surfaces_for_stream( su_flags->bits.out_tf = 1; } - for (i = 0 ; i < surface_count; i++) { - enum surface_update_type type = + for (int i = 0 ; i < surface_count; i++) { + struct surface_update_descriptor inner_type = det_surface_update(check_config, &updates[i]); - elevate_update_type(&overall_type, type); + elevate_update_type(&overall_type, inner_type.update_type, inner_type.lock_descriptor); } return overall_type; @@ -3036,7 +3047,7 @@ static enum surface_update_type check_update_surfaces_for_stream( * * See :c:type:`enum surface_update_type ` for explanation of update types */ -enum surface_update_type dc_check_update_surfaces_for_stream( +struct surface_update_descriptor dc_check_update_surfaces_for_stream( const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, @@ -3468,7 +3479,7 @@ static bool update_planes_and_stream_state(struct dc *dc, context = dc->current_state; update_type = dc_check_update_surfaces_for_stream( - &dc->check_config, srf_updates, surface_count, stream_update); + &dc->check_config, srf_updates, surface_count, stream_update).update_type; if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream)) update_type = UPDATE_TYPE_FULL; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4a5aa0bd3bc4..142cbb7c77cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -467,6 +467,19 @@ enum surface_update_type { UPDATE_TYPE_FULL, /* may need to shuffle resources */ }; +enum dc_lock_descriptor { + LOCK_DESCRIPTOR_NONE = 0x0, + LOCK_DESCRIPTOR_STATE = 0x1, + LOCK_DESCRIPTOR_LINK = 0x2, + LOCK_DESCRIPTOR_STREAM = 0x4, + LOCK_DESCRIPTOR_PLANE = 0x8, +}; + +struct surface_update_descriptor { + enum surface_update_type update_type; + enum dc_lock_descriptor lock_descriptor; +}; + /* Forward declaration*/ struct dc; struct dc_plane_state; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index c68153c77972..75869eed1d3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -473,7 +473,7 @@ void dc_enable_stereo( /* Triggers multi-stream synchronization. */ void dc_trigger_sync(struct dc *dc, struct dc_state *context); -enum surface_update_type dc_check_update_surfaces_for_stream( +struct surface_update_descriptor dc_check_update_surfaces_for_stream( const struct dc_check_config *check_config, struct dc_surface_update *updates, int surface_count, -- cgit v1.2.3 From 48d6be05f540233a72c954777ede1c60b9094386 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Thu, 16 Oct 2025 13:46:25 +0200 Subject: drm/amd/display: Make observers const-correct [Why] Observers which do not modify their pointer arguments should take them as const. This clearly signals their intent to the caller, making it clear that the function is safe to call multiple times, or remove the call if the result is no longer necessary. [How] Made const-correct all of the functions below: * full_update_required[_weak] * fast_updates_exist * fast_update_only * dc_can_clear_cursor_limit * dc_stream_get_status (added const named overload) Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 44 ++++++++++++++----------- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 8 +++++ drivers/gpu/drm/amd/display/dc/dc.h | 2 +- drivers/gpu/drm/amd/display/dc/dc_stream.h | 4 +-- 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1851a35f629f..b720e007c654 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3427,11 +3427,12 @@ static void update_seamless_boot_flags(struct dc *dc, } } -static bool full_update_required_weak(struct dc *dc, - struct dc_surface_update *srf_updates, +static bool full_update_required_weak( + const struct dc *dc, + const struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_update *stream_update, - struct dc_stream_state *stream); + const struct dc_stream_update *stream_update, + const struct dc_stream_state *stream); /** * update_planes_and_stream_state() - The function takes planes and stream @@ -5015,7 +5016,7 @@ void populate_fast_updates(struct dc_fast_update *fast_update, } } -static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_count) +static bool fast_updates_exist(const struct dc_fast_update *fast_update, int surface_count) { int i; @@ -5056,11 +5057,12 @@ bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_ return false; } -static bool full_update_required_weak(struct dc *dc, - struct dc_surface_update *srf_updates, +static bool full_update_required_weak( + const struct dc *dc, + const struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_update *stream_update, - struct dc_stream_state *stream) + const struct dc_stream_update *stream_update, + const struct dc_stream_state *stream) { const struct dc_state *context = dc->current_state; if (srf_updates) @@ -5069,7 +5071,7 @@ static bool full_update_required_weak(struct dc *dc, return true; if (stream) { - const struct dc_stream_status *stream_status = dc_stream_get_status(stream); + const struct dc_stream_status *stream_status = dc_stream_get_status_const(stream); if (stream_status == NULL || stream_status->plane_count != surface_count) return true; } @@ -5082,11 +5084,12 @@ static bool full_update_required_weak(struct dc *dc, return false; } -static bool full_update_required(struct dc *dc, - struct dc_surface_update *srf_updates, +static bool full_update_required( + const struct dc *dc, + const struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_update *stream_update, - struct dc_stream_state *stream) + const struct dc_stream_update *stream_update, + const struct dc_stream_state *stream) { if (full_update_required_weak(dc, srf_updates, surface_count, stream_update, stream)) return true; @@ -5146,12 +5149,13 @@ static bool full_update_required(struct dc *dc, return false; } -static bool fast_update_only(struct dc *dc, - struct dc_fast_update *fast_update, - struct dc_surface_update *srf_updates, +static bool fast_update_only( + const struct dc *dc, + const struct dc_fast_update *fast_update, + const struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_update *stream_update, - struct dc_stream_state *stream) + const struct dc_stream_update *stream_update, + const struct dc_stream_state *stream) { return fast_updates_exist(fast_update, surface_count) && !full_update_required(dc, srf_updates, surface_count, stream_update, stream); @@ -6383,7 +6387,7 @@ bool dc_is_cursor_limit_pending(struct dc *dc) return false; } -bool dc_can_clear_cursor_limit(struct dc *dc) +bool dc_can_clear_cursor_limit(const struct dc *dc) { uint32_t i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 096fbb8ad24c..6d309c320253 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -224,6 +224,14 @@ struct dc_stream_status *dc_stream_get_status( return dc_state_get_stream_status(dc->current_state, stream); } +const struct dc_stream_status *dc_stream_get_status_const( + const struct dc_stream_state *stream) +{ + struct dc *dc = stream->ctx->dc; + + return dc_state_get_stream_status(dc->current_state, stream); +} + void program_cursor_attributes( struct dc *dc, struct dc_stream_state *stream) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 142cbb7c77cd..5bfd764467a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2757,7 +2757,7 @@ bool dc_is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream); bool dc_is_cursor_limit_pending(struct dc *dc); -bool dc_can_clear_cursor_limit(struct dc *dc); +bool dc_can_clear_cursor_limit(const struct dc *dc); /** * dc_get_underflow_debug_data_for_otg() - Retrieve underflow debug data. diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 75869eed1d3a..321cfe92d799 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -491,8 +491,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); -struct dc_stream_status *dc_stream_get_status( - struct dc_stream_state *dc_stream); +struct dc_stream_status *dc_stream_get_status(struct dc_stream_state *dc_stream); +const struct dc_stream_status *dc_stream_get_status_const(const struct dc_stream_state *dc_stream); /******************************************************************************* * Cursor interfaces - To manages the cursor within a stream -- cgit v1.2.3 From 99441824bec63549a076cd86631d138ec9a0c71c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 16 Oct 2025 20:08:10 -0600 Subject: drm/amd/display: Add HDR workaround for a specific eDP [WHY & HOW] Some eDP panels suffer from flicking when HDR is enabled in KDE or Gnome. This add another quirk to worksaround to skip VSC that is incompatible with an eDP panel. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/4452 Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index fe100e4c9801..cc21337a182f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -83,6 +83,7 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct edid_caps->panel_patch.remove_sink_ext_caps = true; break; case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4171): drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_colorimetry = true; break; -- cgit v1.2.3 From cb57b8cdb072dc37723b6906da1c37ff9cbc2da4 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Wed, 17 Sep 2025 11:00:02 -0400 Subject: drm/amd/display: Fix incorrect return of vblank enable on unconfigured crtc [Why&How] Return -EINVAL when userspace asks us to enable vblank on a crtc that is not yet enabled. Suggested-by: Aurabindo Pillai Reviewed-by: Aurabindo Pillai Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1856 Signed-off-by: Ivan Lipski Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index f08121a2b838..38f9ea313dcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -308,8 +308,12 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) int irq_type; int rc = 0; - if (acrtc->otg_inst == -1) - goto skip; + if (enable && !acrtc->base.enabled) { + drm_dbg_vbl(crtc->dev, + "Reject vblank enable on unconfigured CRTC %d (enabled=%d)\n", + acrtc->crtc_id, acrtc->base.enabled); + return -EINVAL; + } irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); @@ -394,7 +398,7 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) return rc; } #endif -skip: + if (amdgpu_in_reset(adev)) return 0; -- cgit v1.2.3 From 020ee49ce9a839285806cc430ca8e059fc95aade Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 17 Oct 2025 17:20:46 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.1.33.0 [Why & How] - Extend reply debug flags, define a new bit as debug_log_enabled - Replace the padding to frame_skip_number in struct dmub_cmd_replay_set_coasting_vtotal_data Acked-by: Tom Chung Signed-off-by: Taimur Hassan Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b2d5154d1402..772e07a1a959 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -485,7 +485,13 @@ union replay_debug_flags { */ uint32_t enable_visual_confirm_debug : 1; - uint32_t reserved : 18; + /** + * 0x4000 (bit 14) + * @debug_log_enabled: Debug Log Enabled + */ + uint32_t debug_log_enabled : 1; + + uint32_t reserved : 17; } bitfields; uint32_t u32All; @@ -4608,9 +4614,9 @@ struct dmub_cmd_replay_set_coasting_vtotal_data { */ uint16_t coasting_vtotal_high; /** - * Explicit padding to 4 byte boundary. + * frame skip number. */ - uint8_t pad[2]; + uint16_t frame_skip_number; }; /** -- cgit v1.2.3 From a981cb0f37eb20f47b8fc4072602ba728783082f Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 17 Oct 2025 19:22:02 -0500 Subject: drm/amd/display: Promote DC to 3.2.356 This version brings along following update: - Fix incorrect return of vblank enable on unconfigured crtc - Add HDR workaround for a specific eDP - Make observers const-correct - Add lock descriptor to check_update - Update cursor offload assignments - Add dc interface to log pre os firmware information - Init dispclk from bootup clock for DCN315 - Remove dc param from check_update - Update link encoder assignment - Add more DC HW state info to underflow logging - Rename dml2 to dml2_0 folder - Fix notification of vtotal to DMU for cursor offload - Fix wrong index for DCN401 cursor offload - Add opp count validation to dml2.1 - Fix DMUB reset sequence for DCN32 - Bump minimum for frame_warn_limit Acked-by: Tom Chung Signed-off-by: Taimur Hassan Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5bfd764467a1..010d9315b96b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -63,7 +63,7 @@ struct dcn_dsc_reg_state; struct dcn_optc_reg_state; struct dcn_dccg_reg_state; -#define DC_VER "3.2.355" +#define DC_VER "3.2.356" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC -- cgit v1.2.3 From f18719ef4bb7b0e48dee92703b01e16f6c0d6318 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Tue, 21 Oct 2025 13:01:06 +0800 Subject: drm/amdgpu: Convert amdgpu userqueue management from IDR to XArray MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit refactors the AMDGPU userqueue management subsystem to replace IDR (ID Allocation) with XArray for improved performance, scalability, and maintainability. The changes address several issues with the previous IDR implementation and provide better locking semantics. Key changes: 1. **Global XArray Introduction**: - Added `userq_doorbell_xa` to `struct amdgpu_device` for global queue tracking - Uses doorbell_index as key for efficient global lookup - Replaces the previous `userq_mgr_list` linked list approach 2. **Per-process XArray Conversion**: - Replaced `userq_idr` with `userq_mgr_xa` in `struct amdgpu_userq_mgr` - Maintains per-process queue tracking with queue_id as key - Uses XA_FLAGS_ALLOC for automatic ID allocation 3. **Locking Improvements**: - Removed global `userq_mutex` from `struct amdgpu_device` - Replaced with fine-grained XArray locking using XArray's internal spinlocks 4. **Runtime Idle Check Optimization**: - Updated `amdgpu_runtime_idle_check_userq()` to use xa_empty 5. **Queue Management Functions**: - Converted all IDR operations to equivalent XArray functions: - `idr_alloc()` → `xa_alloc()` - `idr_find()` → `xa_load()` - `idr_remove()` → `xa_erase()` - `idr_for_each()` → `xa_for_each()` Benefits: - **Performance**: XArray provides better scalability for large numbers of queues - **Memory Efficiency**: Reduced memory overhead compared to IDR - **Thread Safety**: Improved locking semantics with XArray's internal spinlocks v2: rename userq_global_xa/userq_xa to userq_doorbell_xa/userq_mgr_xa Remove xa_lock and use its own lock. v3: Set queue->userq_mgr = uq_mgr in amdgpu_userq_create() v4: use xa_store_irq (Christian) hold the read side of the reset lock while creating/destroying queues and the manager data structure. (Chritian) Acked-by: Alex Deucher Suggested-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 151 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 4 +- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 22 ++-- 7 files changed, 101 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5574e84694b..50079209c472 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1176,6 +1176,12 @@ struct amdgpu_device { * queue fence. */ struct xarray userq_xa; + /** + * @userq_doorbell_xa: Global user queue map (doorbell index → queue) + * Key: doorbell_index (unique global identifier for the queue) + * Value: struct amdgpu_usermode_queue + */ + struct xarray userq_doorbell_xa; /* df */ struct amdgpu_df df; @@ -1309,8 +1315,6 @@ struct amdgpu_device { */ bool apu_prefer_gtt; - struct list_head userq_mgr_list; - struct mutex userq_mutex; bool userq_halt_for_enforce_isolation; struct amdgpu_uid *uid_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3ffb9bb1ec0b..b8d91247f51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4558,7 +4558,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.userq_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); - mutex_init(&adev->userq_mutex); amdgpu_device_init_apu_flags(adev); @@ -4586,7 +4585,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->pm.od_kobj_list); - INIT_LIST_HEAD(&adev->userq_mgr_list); + xa_init(&adev->userq_doorbell_xa); INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a36e15beafeb..cee90f9e58a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2772,22 +2772,8 @@ static int amdgpu_runtime_idle_check_userq(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - int queue_id; - int ret = 0; - - mutex_lock(&adev->userq_mutex); - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - ret = -EBUSY; - goto done; - } - } -done: - mutex_unlock(&adev->userq_mutex); - return ret; + return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY; } static int amdgpu_pmops_runtime_suspend(struct device *dev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 2200e0bbf040..3b1811b110a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -30,6 +30,7 @@ #include "amdgpu_vm.h" #include "amdgpu_userq.h" #include "amdgpu_hmm.h" +#include "amdgpu_reset.h" #include "amdgpu_userq_fence.h" u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) @@ -278,19 +279,27 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + /* Wait for mode-1 reset to complete */ + down_read(&adev->reset_domain->sem); + /* Drop the userq reference. */ amdgpu_userq_buffer_vas_list_cleanup(adev, queue); uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_userq_fence_driver_free(queue); - idr_remove(&uq_mgr->userq_idr, queue_id); + /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ + xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id); + xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); + queue->userq_mgr = NULL; list_del(&queue->userq_va_list); kfree(queue); + + up_read(&adev->reset_domain->sem); } static struct amdgpu_usermode_queue * amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { - return idr_find(&uq_mgr->userq_idr, qid); + return xa_load(&uq_mgr->userq_mgr_xa, qid); } void @@ -551,8 +560,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) struct amdgpu_db_info db_info; char *queue_name; bool skip_map_queue; + u32 qid; uint64_t index; - int qid, r = 0; + int r = 0; int priority = (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; @@ -575,7 +585,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) * * This will also make sure we have a valid eviction fence ready to be used. */ - mutex_lock(&adev->userq_mutex); amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); uq_funcs = adev->userq_funcs[args->in.ip_type]; @@ -638,15 +647,27 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } - qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); - if (qid < 0) { + /* Wait for mode-1 reset to complete */ + down_read(&adev->reset_domain->sem); + r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); + if (r) { + kfree(queue); + up_read(&adev->reset_domain->sem); + goto unlock; + } + + r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); + if (r) { drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); amdgpu_userq_fence_driver_free(queue); uq_funcs->mqd_destroy(uq_mgr, queue); kfree(queue); r = -ENOMEM; + up_read(&adev->reset_domain->sem); goto unlock; } + up_read(&adev->reset_domain->sem); + queue->userq_mgr = uq_mgr; /* don't map the queue if scheduling is halted */ if (adev->userq_halt_for_enforce_isolation && @@ -659,7 +680,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = amdgpu_userq_map_helper(uq_mgr, queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - idr_remove(&uq_mgr->userq_idr, qid); + xa_erase(&uq_mgr->userq_mgr_xa, qid); amdgpu_userq_fence_driver_free(queue); uq_funcs->mqd_destroy(uq_mgr, queue); kfree(queue); @@ -684,7 +705,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) unlock: mutex_unlock(&uq_mgr->userq_mutex); - mutex_unlock(&adev->userq_mutex); return r; } @@ -782,11 +802,11 @@ static int amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_usermode_queue *queue; - int queue_id; + unsigned long queue_id; int ret = 0, r; /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, @@ -1023,11 +1043,11 @@ static int amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_usermode_queue *queue; - int queue_id; + unsigned long queue_id; int ret = 0, r; /* Try to unmap all the queues in this process ctx */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { r = amdgpu_userq_preempt_helper(uq_mgr, queue); if (r) ret = r; @@ -1042,9 +1062,10 @@ static int amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_usermode_queue *queue; - int queue_id, ret; + unsigned long queue_id; + int ret; - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) { struct dma_fence *f = queue->last_fence; if (!f || dma_fence_is_signaled(f)) @@ -1097,44 +1118,30 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f struct amdgpu_device *adev) { mutex_init(&userq_mgr->userq_mutex); - idr_init_base(&userq_mgr->userq_idr, 1); + xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC); userq_mgr->adev = adev; userq_mgr->file = file_priv; - mutex_lock(&adev->userq_mutex); - list_add(&userq_mgr->list, &adev->userq_mgr_list); - mutex_unlock(&adev->userq_mutex); - INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); return 0; } void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { - struct amdgpu_device *adev = userq_mgr->adev; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - uint32_t queue_id; + unsigned long queue_id; cancel_delayed_work_sync(&userq_mgr->resume_work); - mutex_lock(&adev->userq_mutex); mutex_lock(&userq_mgr->userq_mutex); - idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { + xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) { amdgpu_userq_wait_for_last_fence(userq_mgr, queue); amdgpu_userq_unmap_helper(userq_mgr, queue); amdgpu_userq_cleanup(userq_mgr, queue, queue_id); } - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { - if (uqm == userq_mgr) { - list_del(&uqm->list); - break; - } - } - idr_destroy(&userq_mgr->userq_idr); + xa_destroy(&userq_mgr->userq_mgr_xa); mutex_unlock(&userq_mgr->userq_mutex); - mutex_unlock(&adev->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } @@ -1142,25 +1149,23 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) { u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - int queue_id; + struct amdgpu_userq_mgr *uqm; + unsigned long queue_id; int r; if (!ip_mask) return 0; - guard(mutex)(&adev->userq_mutex); - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { + uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); guard(mutex)(&uqm->userq_mutex); - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - if (adev->in_s0ix) - r = amdgpu_userq_preempt_helper(uqm, queue); - else - r = amdgpu_userq_unmap_helper(uqm, queue); - if (r) - return r; - } + if (adev->in_s0ix) + r = amdgpu_userq_preempt_helper(uqm, queue); + else + r = amdgpu_userq_unmap_helper(uqm, queue); + if (r) + return r; } return 0; } @@ -1169,24 +1174,22 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) { u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - int queue_id; + struct amdgpu_userq_mgr *uqm; + unsigned long queue_id; int r; if (!ip_mask) return 0; - guard(mutex)(&adev->userq_mutex); - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { + uqm = queue->userq_mgr; guard(mutex)(&uqm->userq_mutex); - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - if (adev->in_s0ix) - r = amdgpu_userq_restore_helper(uqm, queue); - else - r = amdgpu_userq_map_helper(uqm, queue); - if (r) - return r; - } + if (adev->in_s0ix) + r = amdgpu_userq_restore_helper(uqm, queue); + else + r = amdgpu_userq_map_helper(uqm, queue); + if (r) + return r; } return 0; @@ -1197,33 +1200,31 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, { u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - int queue_id; + struct amdgpu_userq_mgr *uqm; + unsigned long queue_id; int ret = 0, r; /* only need to stop gfx/compute */ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) return 0; - mutex_lock(&adev->userq_mutex); if (adev->userq_halt_for_enforce_isolation) dev_warn(adev->dev, "userq scheduling already stopped!\n"); adev->userq_halt_for_enforce_isolation = true; - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { + uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); mutex_lock(&uqm->userq_mutex); - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - if (((queue->queue_type == AMDGPU_HW_IP_GFX) || - (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && - (queue->xcp_id == idx)) { - r = amdgpu_userq_preempt_helper(uqm, queue); - if (r) - ret = r; - } + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + r = amdgpu_userq_preempt_helper(uqm, queue); + if (r) + ret = r; } mutex_unlock(&uqm->userq_mutex); } - mutex_unlock(&adev->userq_mutex); + return ret; } @@ -1232,21 +1233,20 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, { u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; - int queue_id; + struct amdgpu_userq_mgr *uqm; + unsigned long queue_id; int ret = 0, r; /* only need to stop gfx/compute */ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) return 0; - mutex_lock(&adev->userq_mutex); if (!adev->userq_halt_for_enforce_isolation) dev_warn(adev->dev, "userq scheduling already started!\n"); adev->userq_halt_for_enforce_isolation = false; - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { + uqm = queue->userq_mgr; mutex_lock(&uqm->userq_mutex); - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { @@ -1254,10 +1254,9 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, if (r) ret = r; } - } mutex_unlock(&uqm->userq_mutex); } - mutex_unlock(&adev->userq_mutex); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 036d8dd585cd..09da0617bfa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -96,11 +96,15 @@ struct amdgpu_userq_funcs { /* Usermode queues for gfx */ struct amdgpu_userq_mgr { - struct idr userq_idr; + /** + * @userq_mgr_xa: Per-process user queue map (queue ID → queue) + * Key: queue_id (unique ID within the process's userq manager) + * Value: struct amdgpu_usermode_queue + */ + struct xarray userq_mgr_xa; struct mutex userq_mutex; struct amdgpu_device *adev; struct delayed_work resume_work; - struct list_head list; struct drm_file *file; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 761bad98da3e..2aeeaa954882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -537,7 +537,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } /* Retrieve the user queue */ - queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id); if (!queue) { r = -ENOENT; goto put_gobj_write; @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, */ num_fences = dma_fence_dedup_array(fences, num_fences); - waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); + waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id); if (!waitq) { r = -EINVAL; goto free_fences; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 5c63480dda9c..9894a3eed215 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -205,10 +205,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev); struct mes_detect_and_reset_queue_input input; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm, *tmp; unsigned int hung_db_num = 0; - int queue_id, r, i; + unsigned long queue_id; u32 db_array[8]; + int r, i; if (db_array_size > 8) { dev_err(adev->dev, "DB array size (%d vs 8) too small\n", @@ -227,16 +227,14 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, if (r) { dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r); } else if (hung_db_num) { - list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { - idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - if (queue->queue_type == queue_type) { - for (i = 0; i < hung_db_num; i++) { - if (queue->doorbell_index == db_array[i]) { - queue->state = AMDGPU_USERQ_STATE_HUNG; - atomic_inc(&adev->gpu_reset_counter); - amdgpu_userq_fence_driver_force_completion(queue); - drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); - } + xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { + if (queue->queue_type == queue_type) { + for (i = 0; i < hung_db_num; i++) { + if (queue->doorbell_index == db_array[i]) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + atomic_inc(&adev->gpu_reset_counter); + amdgpu_userq_fence_driver_force_completion(queue); + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); } } } -- cgit v1.2.3 From 97010d773208c4b800e4af1063e58a97c056f40b Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:41 +0200 Subject: drm/amd/display: Determine DVI-I connector type (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DC determines the DRM connector type based on the signal type, which becomes problematic when a connector may support different signal types, such as DVI-I. With this patch, it is now determined according to the actual connector type for DVI-D and DVI-I connectors. Also set the HPD (hotplug detection) flag for DVI-I connectors to prevent regressing their digital functionality, which has been already working. A subsequent commit will also implement polling for DVI-I. v2: Only use connector type for DVI to prevent regressions for other signal types. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5b446e4000ef..9e260ca04e1e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8263,7 +8263,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, return 0; } -static int to_drm_connector_type(enum signal_type st) +static int to_drm_connector_type(enum signal_type st, uint32_t connector_id) { switch (st) { case SIGNAL_TYPE_HDMI_TYPE_A: @@ -8279,6 +8279,10 @@ static int to_drm_connector_type(enum signal_type st) return DRM_MODE_CONNECTOR_DisplayPort; case SIGNAL_TYPE_DVI_DUAL_LINK: case SIGNAL_TYPE_DVI_SINGLE_LINK: + if (connector_id == CONNECTOR_ID_SINGLE_LINK_DVII || + connector_id == CONNECTOR_ID_DUAL_LINK_DVII) + return DRM_MODE_CONNECTOR_DVII; + return DRM_MODE_CONNECTOR_DVID; case SIGNAL_TYPE_VIRTUAL: return DRM_MODE_CONNECTOR_VIRTUAL; @@ -8668,6 +8672,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, link->link_enc->features.dp_ycbcr420_supported ? true : false; break; case DRM_MODE_CONNECTOR_DVID: + case DRM_MODE_CONNECTOR_DVII: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; break; default: @@ -8871,7 +8876,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, goto out_free; } - connector_type = to_drm_connector_type(link->connector_signal); + connector_type = to_drm_connector_type(link->connector_signal, link->link_id.id); res = drm_connector_init_with_ddc( dm->ddev, -- cgit v1.2.3 From 7bb02a34c2ba69bbe0329fc3049c795f7ba66b9a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 24 Oct 2025 10:14:56 +0530 Subject: drm/amdkfd: add missing return value check for range amdgpu_hmm_range_alloc could fails in case of low memory condition and hence we should have a check for the return value. Signed-off-by: Sunil Khatri Reviewed-by: Shirish S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f041643308ca..7f0ab73e2396 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1738,6 +1738,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, WRITE_ONCE(p->svms.faulting_task, current); range = amdgpu_hmm_range_alloc(NULL); + if (unlikely(!range)) { + r = -ENOMEM; + goto free_ctx; + } + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, readonly, owner, range); -- cgit v1.2.3 From 22d9e071e5028815accf26956ed0b9634a4c0b8d Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 15 Oct 2025 15:17:54 -0400 Subject: drm/amdkfd: Dequeue user queues when process mm released Move dequeue user queues and destroy user queues from kfd_process_wq_release to mmu notifier release callback, to ensure no system memory access from GPU because the process memory is going to free from CPU after mmu release notifier callback returns. Destroy queue releases the svm prange queue_refcount, this also removes fake flase positive warning message "Freeing queue vital buffer" message if application crash or killed. Suggested-by: Felix Kuehling Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ddfe30c13e9d..f5d173f1ca3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1162,9 +1162,6 @@ static void kfd_process_wq_release(struct work_struct *work) release_work); struct dma_fence *ef; - kfd_process_dequeue_from_all_devices(p); - pqm_uninit(&p->pqm); - /* * If GPU in reset, user queues may still running, wait for reset complete. */ @@ -1226,6 +1223,14 @@ static void kfd_process_notifier_release_internal(struct kfd_process *p) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + /* + * Dequeue and destroy user queues, it is not safe for GPU to access + * system memory after mmu release notifier callback returns because + * exit_mmap free process memory afterwards. + */ + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; -- cgit v1.2.3 From a0559012a18a5a6ad87516e982892765a403b8ab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 15:21:02 -0400 Subject: drm/amdgpu/userq: fix SDMA and compute validation The CSA and EOP buffers have different alignement requirements. Hardcode them for now as a bug fix. A proper query will be added in a subsequent patch. v2: verify gfx shadow helper callback (Prike) Fixes: 9e46b8bb0539 ("drm/amdgpu: validate userq buffer virtual address and size") Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9894a3eed215..b1ee9473d628 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -252,7 +252,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; struct drm_amdgpu_userq_in *mqd_user = args_in; struct amdgpu_mqd_prop *userq_props; - struct amdgpu_gfx_shadow_info shadow_info; int r; /* Structure to initialize MQD for userqueue using generic MQD init function */ @@ -278,8 +277,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->doorbell_index = queue->doorbell_index; userq_props->fence_address = queue->fence_drv->gpu_addr; - if (adev->gfx.funcs->get_gfx_shadow_info) - adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true); if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -297,7 +294,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, } r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va, - max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)); + 2048); if (r) goto free_mqd; @@ -310,6 +307,14 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, kfree(compute_mqd); } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + struct amdgpu_gfx_shadow_info shadow_info; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true); + } else { + r = -EINVAL; + goto free_mqd; + } if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { DRM_ERROR("Invalid GFX MQD\n"); @@ -333,6 +338,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, shadow_info.shadow_size); if (r) goto free_mqd; + r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va, + shadow_info.csa_size); + if (r) + goto free_mqd; kfree(mqd_gfx_v11); } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { @@ -351,7 +360,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va, - shadow_info.csa_size); + 32); if (r) goto free_mqd; -- cgit v1.2.3 From 1017e393ad6d61756f7f35a6eb059a42a40818e6 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 24 Oct 2025 22:29:00 +0530 Subject: drm/amdkfd: fix the clean up when amdgpu_hmm_range_alloc fails we need to unreserve the bo's too during clean up along with freeing the memory of context. Fixes: 7bb02a34c2ba ("drm/amdkfd: add missing return value check for range") Signed-off-by: Sunil Khatri Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7f0ab73e2396..729aac81563c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1738,14 +1738,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, WRITE_ONCE(p->svms.faulting_task, current); range = amdgpu_hmm_range_alloc(NULL); - if (unlikely(!range)) { + if (likely(range)) + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, + readonly, owner, range); + else r = -ENOMEM; - goto free_ctx; - } - - r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, - range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { amdgpu_hmm_range_free(range); -- cgit v1.2.3 From dfc74e37bdb487eed5ad90d0eac4055f60217fb0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 23 Oct 2025 19:54:16 +0530 Subject: drm/amdkfd: Fix use-after-free of HMM range in svm_range_validate_and_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function svm_range_validate_and_map() was freeing `range` when amdgpu_hmm_range_get_pages() failed. But later, the code still used the same `range` pointer and freed it again. This could cause a use-after-free and double-free issue. The fix sets `range = NULL` right after it is freed and checks for `range` before using or freeing it again. v2: Removed duplicate !r check in the condition for clarity. v3: In amdgpu_hmm_range_get_pages(), when hmm_range_fault() fails, we kvfree(pfns) but leave the pointer in hmm_range->hmm_pfns still pointing to freed memory. The caller (or amdgpu_hmm_range_free(range)) may try to free range->hmm_range.hmm_pfns again, causing a double free, Setting hmm_range->hmm_pfns = NULL immediately after kvfree(pfns) prevents both double free. (Philip) In svm_range_validate_and_map(), When r == 0, it means success → range is not NULL. When r != 0, it means failure → already made range = NULL. So checking both (!r && range) is unnecessary because the moment r == 0, we automatically know range exists and is safe to use. (Philip) Fixes: 737da5363cc0 ("drm/amdgpu: update the functions to use amdgpu version of hmm") Reported by: Dan Carpenter Cc: Philip Yang Cc: Sunil Khatri Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 7e5a09b0bc78..518ca3f4db2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -221,6 +221,7 @@ retry: out_free_pfns: kvfree(pfns); + hmm_range->hmm_pfns = NULL; out_free_range: if (r == -EBUSY) r = -EAGAIN; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 729aac81563c..ffb7b36e577c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1746,6 +1746,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { amdgpu_hmm_range_free(range); + range = NULL; pr_debug("failed %d to get svm range pages\n", r); } } else { @@ -1763,7 +1764,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, svm_range_lock(prange); /* Free backing memory of hmm_range if it was initialized - * Overrride return value to TRY AGAIN only if prior returns + * Override return value to TRY AGAIN only if prior returns * were successful */ if (range && !amdgpu_hmm_range_valid(range) && !r) { @@ -1771,7 +1772,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } /* Free the hmm range */ - amdgpu_hmm_range_free(range); + if (range) + amdgpu_hmm_range_free(range); if (!r && !list_empty(&prange->child_list)) { -- cgit v1.2.3 From d64fd9da7ee95a344dc7e076e11e3e4def715510 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:42 +0200 Subject: drm/amd/display: Add analog bit to edid_caps (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new analog bit will be used with DVI-I connectors. DVI-I connectors can connect to both digital and analog monitors and this bit will help distinguish between those. v2: Sanitize analog bit based on connector type. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 1 + drivers/gpu/drm/amd/display/dc/dc_types.h | 5 +++++ drivers/gpu/drm/amd/display/dc/link/link_detection.c | 2 ++ drivers/gpu/drm/amd/display/include/grph_object_id.h | 7 +++++++ 4 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index cc21337a182f..eb2c587b0b9b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -131,6 +131,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->serial_number = edid_buf->serial; edid_caps->manufacture_week = edid_buf->mfg_week; edid_caps->manufacture_year = edid_buf->mfg_year; + edid_caps->analog = !(edid_buf->input & DRM_EDID_INPUT_DIGITAL); drm_edid_get_monitor_name(edid_buf, edid_caps->display_name, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 8f8ccde7ad94..9989c8af5174 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -185,6 +185,10 @@ struct dc_panel_patch { unsigned int wait_after_dpcd_poweroff_ms; }; +/** + * struct dc_edid_caps - Capabilities read from EDID. + * @analog: Whether the monitor is analog. Used by DVI-I handling. + */ struct dc_edid_caps { /* sink identification */ uint16_t manufacturer_id; @@ -213,6 +217,7 @@ struct dc_edid_caps { bool hdr_supported; bool rr_capable; bool scdc_present; + bool analog; struct dc_panel_patch panel_patch; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 82a9e52d5ae5..801c94efa7fc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1107,6 +1107,8 @@ static bool detect_link_and_local_sink(struct dc_link *link, break; } + sink->edid_caps.analog &= dc_connector_supports_analog(link->link_id.id); + // Check if edid is the same if ((prev_sink) && (edid_status == EDID_THE_SAME || edid_status == EDID_OK)) diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h index 54e33062b3c0..1386fa124e85 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_id.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h @@ -310,4 +310,11 @@ static inline bool dal_graphics_object_id_equal( } return false; } + +static inline bool dc_connector_supports_analog(const enum connector_id conn) +{ + return conn == CONNECTOR_ID_VGA || + conn == CONNECTOR_ID_SINGLE_LINK_DVII || + conn == CONNECTOR_ID_DUAL_LINK_DVII; +} #endif -- cgit v1.2.3 From 2394736e70665117ad78f208e794ea7df03c73f4 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:43 +0200 Subject: drm/amd/display: Introduce MAX_LINK_ENCODERS (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are going to support analog encoders as well, not just digital, so we need to make space for them in various arrays. v2: Fix typo. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 4 ++-- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 8 ++++---- drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 24 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index a180f68f711c..deb23d20bca6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -522,10 +522,10 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link( struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc) { struct link_encoder *link_enc = NULL; - enum engine_id encs_assigned[MAX_DIG_LINK_ENCODERS]; + enum engine_id encs_assigned[MAX_LINK_ENCODERS]; int i; - for (i = 0; i < MAX_DIG_LINK_ENCODERS; i++) + for (i = 0; i < MAX_LINK_ENCODERS; i++) encs_assigned[i] = ENGINE_ID_UNKNOWN; /* Add assigned encoders to list. */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 3fe590436e74..5ed2cd344804 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -274,7 +274,7 @@ struct resource_pool { /* An array for accessing the link encoder objects that have been created. * Index in array corresponds to engine ID - viz. 0: ENGINE_ID_DIGA */ - struct link_encoder *link_encoders[MAX_DIG_LINK_ENCODERS]; + struct link_encoder *link_encoders[MAX_LINK_ENCODERS]; /* Number of DIG link encoder objects created - i.e. number of valid * entries in link_encoders array. */ @@ -514,7 +514,7 @@ struct pipe_ctx { struct link_enc_cfg_context { enum link_enc_cfg_mode mode; struct link_enc_assignment link_enc_assignments[MAX_PIPES]; - enum engine_id link_enc_avail[MAX_DIG_LINK_ENCODERS]; + enum engine_id link_enc_avail[MAX_LINK_ENCODERS]; struct link_enc_assignment transient_assignments[MAX_PIPES]; }; @@ -526,8 +526,8 @@ struct resource_context { uint8_t dp_clock_source_ref_count; bool is_dsc_acquired[MAX_PIPES]; struct link_enc_cfg_context link_enc_cfg_ctx; - unsigned int dio_link_enc_to_link_idx[MAX_DIG_LINK_ENCODERS]; - int dio_link_enc_ref_cnts[MAX_DIG_LINK_ENCODERS]; + unsigned int dio_link_enc_to_link_idx[MAX_LINK_ENCODERS]; + int dio_link_enc_ref_cnts[MAX_LINK_ENCODERS]; bool is_hpo_dp_stream_enc_acquired[MAX_HPO_DP2_ENCODERS]; unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 62a39204fe0b..5e2813e9ae2f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -51,6 +51,30 @@ #define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) +/** + * define MAX_DIG_LINK_ENCODERS - maximum number of digital encoders + * + * Digital encoders are ENGINE_ID_DIGA...G, there are at most 7, + * although not every GPU may have that many. + */ +#define MAX_DIG_LINK_ENCODERS 7 + +/** + * define MAX_DAC_LINK_ENCODERS - maximum number of analog link encoders + * + * Analog encoders are ENGINE_ID_DACA/B, there are at most 2, + * although not every GPU may have that many. Modern GPUs typically + * don't have analog encoders. + */ +#define MAX_DAC_LINK_ENCODERS 2 + +/** + * define MAX_LINK_ENCODERS - maximum number link encoders in total + * + * This includes both analog and digital encoders. + */ +#define MAX_LINK_ENCODERS (MAX_DIG_LINK_ENCODERS + MAX_DAC_LINK_ENCODERS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 -- cgit v1.2.3 From 69b29b894660429280dfff0daed7d183c2fae7a7 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:44 +0200 Subject: drm/amd/display: Hook up DAC to bios_parser_encoder_control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable the codebase to use encoder_control() when the encoder engine is one of the DACs. The BIOS parser already supports calling the DAC1EncoderControl function from the VBIOS, but it was not exposed anywhere. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 154fd2c18e88..318be0bb5549 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -742,6 +742,22 @@ static enum bp_result bios_parser_encoder_control( { struct bios_parser *bp = BP_FROM_DCB(dcb); + if (cntl->engine_id == ENGINE_ID_DACA) { + if (!bp->cmd_tbl.dac1_encoder_control) + return BP_RESULT_FAILURE; + + return bp->cmd_tbl.dac1_encoder_control( + bp, cntl->action == ENCODER_CONTROL_ENABLE, + cntl->pixel_clock, ATOM_DAC1_PS2); + } else if (cntl->engine_id == ENGINE_ID_DACB) { + if (!bp->cmd_tbl.dac2_encoder_control) + return BP_RESULT_FAILURE; + + return bp->cmd_tbl.dac2_encoder_control( + bp, cntl->action == ENCODER_CONTROL_ENABLE, + cntl->pixel_clock, ATOM_DAC1_PS2); + } + if (!bp->cmd_tbl.dig_encoder_control) return BP_RESULT_FAILURE; -- cgit v1.2.3 From 7fb4f254c8ebcd89e32c13c2a5758b0664f9fcc4 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:45 +0200 Subject: drm/amd/display: Add SelectCRTC_Source to BIOS parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SelectCRTC_Source command will be used to change which CRTC should be connected to which encoder. For reference, see the legacy display code: amdgpu_atombios_encoder_set_crtc_source Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 14 ++ .../gpu/drm/amd/display/dc/bios/command_table.c | 194 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/bios/command_table.h | 3 + drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 3 + .../drm/amd/display/include/bios_parser_types.h | 6 +- 5 files changed, 215 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 318be0bb5549..c7875edea321 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -736,6 +736,18 @@ static enum bp_result bios_parser_transmitter_control( return bp->cmd_tbl.transmitter_control(bp, cntl); } +static enum bp_result bios_parser_select_crtc_source( + struct dc_bios *dcb, + struct bp_crtc_source_select *bp_params) +{ + struct bios_parser *bp = BP_FROM_DCB(dcb); + + if (!bp->cmd_tbl.select_crtc_source) + return BP_RESULT_FAILURE; + + return bp->cmd_tbl.select_crtc_source(bp, bp_params); +} + static enum bp_result bios_parser_encoder_control( struct dc_bios *dcb, struct bp_encoder_control *cntl) @@ -2845,6 +2857,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .is_device_id_supported = bios_parser_is_device_id_supported, /* COMMANDS */ + .select_crtc_source = bios_parser_select_crtc_source, + .encoder_control = bios_parser_encoder_control, .transmitter_control = bios_parser_transmitter_control, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 58e88778da7f..dbd84477ceb7 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -52,6 +52,7 @@ static void init_transmitter_control(struct bios_parser *bp); static void init_set_pixel_clock(struct bios_parser *bp); static void init_enable_spread_spectrum_on_ppll(struct bios_parser *bp); static void init_adjust_display_pll(struct bios_parser *bp); +static void init_select_crtc_source(struct bios_parser *bp); static void init_dac_encoder_control(struct bios_parser *bp); static void init_dac_output_control(struct bios_parser *bp); static void init_set_crtc_timing(struct bios_parser *bp); @@ -69,6 +70,7 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp) init_set_pixel_clock(bp); init_enable_spread_spectrum_on_ppll(bp); init_adjust_display_pll(bp); + init_select_crtc_source(bp); init_dac_encoder_control(bp); init_dac_output_control(bp); init_set_crtc_timing(bp); @@ -1609,6 +1611,198 @@ static enum bp_result adjust_display_pll_v3( return result; } +/******************************************************************************* + ******************************************************************************** + ** + ** SELECT CRTC SOURCE + ** + ******************************************************************************** + *******************************************************************************/ + +static enum bp_result select_crtc_source_v1( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params); +static enum bp_result select_crtc_source_v2( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params); +static enum bp_result select_crtc_source_v3( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params); + +static void init_select_crtc_source(struct bios_parser *bp) +{ + switch (BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source)) { + case 1: + bp->cmd_tbl.select_crtc_source = select_crtc_source_v1; + break; + case 2: + bp->cmd_tbl.select_crtc_source = select_crtc_source_v2; + break; + case 3: + bp->cmd_tbl.select_crtc_source = select_crtc_source_v3; + break; + default: + bp->cmd_tbl.select_crtc_source = NULL; + break; + } +} + +static enum bp_result select_crtc_source_v1( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params) +{ + enum bp_result result = BP_RESULT_FAILURE; + SELECT_CRTC_SOURCE_PS_ALLOCATION params; + + if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, ¶ms.ucCRTC)) + return BP_RESULT_BADINPUT; + + switch (bp_params->engine_id) { + case ENGINE_ID_DACA: + params.ucDevice = ATOM_DEVICE_CRT1_INDEX; + break; + case ENGINE_ID_DACB: + params.ucDevice = ATOM_DEVICE_CRT2_INDEX; + break; + default: + return BP_RESULT_BADINPUT; + } + + if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) + result = BP_RESULT_OK; + + return result; +} + +static bool select_crtc_source_v2_encoder_id( + enum engine_id engine_id, uint8_t *out_encoder_id) +{ + uint8_t encoder_id = 0; + + switch (engine_id) { + case ENGINE_ID_DIGA: + encoder_id = ASIC_INT_DIG1_ENCODER_ID; + break; + case ENGINE_ID_DIGB: + encoder_id = ASIC_INT_DIG2_ENCODER_ID; + break; + case ENGINE_ID_DIGC: + encoder_id = ASIC_INT_DIG3_ENCODER_ID; + break; + case ENGINE_ID_DIGD: + encoder_id = ASIC_INT_DIG4_ENCODER_ID; + break; + case ENGINE_ID_DIGE: + encoder_id = ASIC_INT_DIG5_ENCODER_ID; + break; + case ENGINE_ID_DIGF: + encoder_id = ASIC_INT_DIG6_ENCODER_ID; + break; + case ENGINE_ID_DIGG: + encoder_id = ASIC_INT_DIG7_ENCODER_ID; + break; + case ENGINE_ID_DACA: + encoder_id = ASIC_INT_DAC1_ENCODER_ID; + break; + case ENGINE_ID_DACB: + encoder_id = ASIC_INT_DAC2_ENCODER_ID; + break; + default: + return false; + } + + *out_encoder_id = encoder_id; + return true; +} + +static bool select_crtc_source_v2_encoder_mode( + enum signal_type signal_type, uint8_t *out_encoder_mode) +{ + uint8_t encoder_mode = 0; + + switch (signal_type) { + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + encoder_mode = ATOM_ENCODER_MODE_DVI; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + encoder_mode = ATOM_ENCODER_MODE_HDMI; + break; + case SIGNAL_TYPE_LVDS: + encoder_mode = ATOM_ENCODER_MODE_LVDS; + break; + case SIGNAL_TYPE_RGB: + encoder_mode = ATOM_ENCODER_MODE_CRT; + break; + case SIGNAL_TYPE_DISPLAY_PORT: + encoder_mode = ATOM_ENCODER_MODE_DP; + break; + case SIGNAL_TYPE_DISPLAY_PORT_MST: + encoder_mode = ATOM_ENCODER_MODE_DP_MST; + break; + case SIGNAL_TYPE_EDP: + encoder_mode = ATOM_ENCODER_MODE_DP; + break; + default: + return false; + } + + *out_encoder_mode = encoder_mode; + return true; +} + +static enum bp_result select_crtc_source_v2( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params) +{ + enum bp_result result = BP_RESULT_FAILURE; + SELECT_CRTC_SOURCE_PARAMETERS_V3 params; + + if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, ¶ms.ucCRTC)) + return BP_RESULT_BADINPUT; + + if (!select_crtc_source_v2_encoder_id( + bp_params->engine_id, + ¶ms.ucEncoderID)) + return BP_RESULT_BADINPUT; + if (!select_crtc_source_v2_encoder_mode( + bp_params->sink_signal, + ¶ms.ucEncodeMode)) + return BP_RESULT_BADINPUT; + + if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) + result = BP_RESULT_OK; + + return result; +} + +static enum bp_result select_crtc_source_v3( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params) +{ + enum bp_result result = BP_RESULT_FAILURE; + SELECT_CRTC_SOURCE_PARAMETERS_V3 params; + + if (!bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, ¶ms.ucCRTC)) + return BP_RESULT_BADINPUT; + + if (!select_crtc_source_v2_encoder_id( + bp_params->engine_id, + ¶ms.ucEncoderID)) + return BP_RESULT_BADINPUT; + if (!select_crtc_source_v2_encoder_mode( + bp_params->sink_signal, + ¶ms.ucEncodeMode)) + return BP_RESULT_BADINPUT; + + params.ucDstBpc = bp_params->bit_depth; + + if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) + result = BP_RESULT_OK; + + return result; +} + /******************************************************************************* ******************************************************************************** ** diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.h b/drivers/gpu/drm/amd/display/dc/bios/command_table.h index ad533775e724..8b04b903e93d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.h @@ -52,6 +52,9 @@ struct cmd_tbl { enum bp_result (*adjust_display_pll)( struct bios_parser *bp, struct bp_adjust_pixel_clock_parameters *bp_params); + enum bp_result (*select_crtc_source)( + struct bios_parser *bp, + struct bp_crtc_source_select *bp_params); enum bp_result (*dac1_encoder_control)( struct bios_parser *bp, bool enable, diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 5fa5e2b63fb7..545ce1e15eae 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -91,6 +91,9 @@ struct dc_vbios_funcs { struct device_id id); /* COMMANDS */ + enum bp_result (*select_crtc_source)( + struct dc_bios *bios, + struct bp_crtc_source_select *bp_params); enum bp_result (*encoder_control)( struct dc_bios *bios, struct bp_encoder_control *cntl); diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 812377d9e48f..d9e58a6a0d36 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -135,12 +135,8 @@ struct bp_external_encoder_control { struct bp_crtc_source_select { enum engine_id engine_id; enum controller_id controller_id; - /* from GPU Tx aka asic_signal */ - enum signal_type signal; - /* sink_signal may differ from asicSignal if Translator encoder */ enum signal_type sink_signal; - enum display_output_bit_depth display_output_bit_depth; - bool enable_dp_audio; + uint8_t bit_depth; }; struct bp_transmitter_control { -- cgit v1.2.3 From 727b179a8032c256b02b816661b2d96713f11e41 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:46 +0200 Subject: drm/amd/display: Get maximum pixel clock from VBIOS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will use this for validating the pixel clock when an analog monitor is connected to VGA or DVI-I connectors. For reference, see the legacy display code: amdgpu_connector_vga_mode_valid Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 2 ++ drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index c7875edea321..33d0ec38ded7 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -441,6 +441,7 @@ static enum bp_result get_firmware_info_v1_4( le32_to_cpu(firmware_info->ulMinPixelClockPLL_Output) * 10; info->pll_info.max_output_pxl_clk_pll_frequency = le32_to_cpu(firmware_info->ulMaxPixelClockPLL_Output) * 10; + info->max_pixel_clock = le16_to_cpu(firmware_info->usMaxPixelClock) * 10; if (firmware_info->usFirmwareCapability.sbfAccess.MemoryClockSS_Support) /* Since there is no information on the SS, report conservative @@ -497,6 +498,7 @@ static enum bp_result get_firmware_info_v2_1( info->external_clock_source_frequency_for_dp = le16_to_cpu(firmwareInfo->usUniphyDPModeExtClkFreq) * 10; info->min_allowed_bl_level = firmwareInfo->ucMinAllowedBL_Level; + info->max_pixel_clock = le16_to_cpu(firmwareInfo->usMaxPixelClock) * 10; /* There should be only one entry in the SS info table for Memory Clock */ diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index cc467031651d..38a77fa9b4af 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -169,6 +169,7 @@ struct dc_firmware_info { uint32_t engine_clk_ss_percentage; } feature; + uint32_t max_pixel_clock; /* in KHz */ uint32_t default_display_engine_pll_frequency; /* in KHz */ uint32_t external_clock_source_frequency_for_dp; /* in KHz */ uint32_t smu_gpu_pll_output_freq; /* in KHz */ -- cgit v1.2.3 From 8c8df54a2b4b6b3a229fa40ab7a1b0ce3edb5896 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:47 +0200 Subject: drm/amd/display: Don't use stereo sync and audio on RGB signals (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analog video signals on VGA or DVI-A (analog part of DVI-I) don't support audio, so avoid calling any audio related functions on analog signals. Stereo sync was not set up for analog signals in the legacy display code either, so there is no loss of functionality if we omit it from DC for now. Also add a dc_is_rgb_signal similar to other dc_is_*_signal. v2: Added comment to clarify what we mean by RGB in this context. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c | 3 ++- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 6 ++++-- drivers/gpu/drm/amd/display/include/signal_types.h | 12 ++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 892907991f91..80344cbd1f99 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -115,7 +115,8 @@ void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx) struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - if (!dc_is_virtual_signal(stream->signal)) + if (!dc_is_virtual_signal(stream->signal) && + !dc_is_rgb_signal(stream->signal)) stream_encoder->funcs->setup_stereo_sync( stream_encoder, pipe_ctx->stream_res.tg->inst, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index dba8ec0988a1..fdefe27f4ba3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2371,7 +2371,8 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) set_avmute(pipe_ctx, true); } - dc->hwss.disable_audio_stream(pipe_ctx); + if (!dc_is_rgb_signal(pipe_ctx->stream->signal)) + dc->hwss.disable_audio_stream(pipe_ctx); update_psp_stream_config(pipe_ctx, true); dc->hwss.blank_stream(pipe_ctx); @@ -2656,7 +2657,8 @@ void link_set_dpms_on( enable_stream_features(pipe_ctx); update_psp_stream_config(pipe_ctx, false); - dc->hwss.enable_audio_stream(pipe_ctx); + if (!dc_is_rgb_signal(pipe_ctx->stream->signal)) + dc->hwss.enable_audio_stream(pipe_ctx); if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { set_avmute(pipe_ctx, false); diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h index a10d6b988aab..3a2c2d2fb629 100644 --- a/drivers/gpu/drm/amd/display/include/signal_types.h +++ b/drivers/gpu/drm/amd/display/include/signal_types.h @@ -118,6 +118,18 @@ static inline bool dc_is_dvi_signal(enum signal_type signal) } } +/** + * dc_is_rgb_signal() - Whether the signal is analog RGB. + * + * Returns whether the given signal type is an analog RGB signal + * that is used with a DAC on VGA or DVI-I connectors. + * Not to be confused with other uses of "RGB", such as RGB color space. + */ +static inline bool dc_is_rgb_signal(enum signal_type signal) +{ + return (signal == SIGNAL_TYPE_RGB); +} + static inline bool dc_is_tmds_signal(enum signal_type signal) { switch (signal) { -- cgit v1.2.3 From e3ea56e995a425a44870dfa357b8608101a67034 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:48 +0200 Subject: drm/amd/display: Don't try to enable/disable HPD when unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VGA connectors don't have HPD (hotplug detection), so don't touch any HPD related registers for VGA. Determine whether hotplug detection is available by checking that the interrupt source is invalid. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f263e1a4537e..cb4bb67289a4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -759,6 +759,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us int max_param_num = 11; enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED; bool disable_hpd = false; + bool supports_hpd = link->irq_source_hpd != DC_IRQ_SOURCE_INVALID; bool valid_test_pattern = false; uint8_t param_nums = 0; /* init with default 80bit custom pattern */ @@ -850,7 +851,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us * because it might have been disabled after a test pattern was set. * AUX depends on HPD * sequence dependent, do not move! */ - if (!disable_hpd) + if (supports_hpd && !disable_hpd) dc_link_enable_hpd(link); prefer_link_settings.lane_count = link->verified_link_cap.lane_count; @@ -888,7 +889,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us * Need disable interrupt to avoid SW driver disable DP output. This is * done after the test pattern is set. */ - if (valid_test_pattern && disable_hpd) + if (valid_test_pattern && supports_hpd && disable_hpd) dc_link_disable_hpd(link); kfree(wr_buf); -- cgit v1.2.3 From c2192872d99b8bc7d1ac2494e850fb6fb6c915eb Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:49 +0200 Subject: drm/amd/display: Determine early if a link has supported encoders (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid initializing DDC, HPD, etc. when we know that the link is not going to be constructed because it has no supported encoders. This is mainly useful for old GPUs which may have encoders such as TRAVIS and NUTMEG that are not yet supported by DC. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 23 ++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index f06af98d46ee..8899797f64a9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -451,6 +451,14 @@ static enum channel_id get_ddc_line(struct dc_link *link) return channel; } +static bool transmitter_supported(const enum transmitter transmitter) +{ + return transmitter != TRANSMITTER_UNKNOWN && + transmitter != TRANSMITTER_NUTMEG_CRT && + transmitter != TRANSMITTER_TRAVIS_CRT && + transmitter != TRANSMITTER_TRAVIS_LCD; +} + static bool construct_phy(struct dc_link *link, const struct link_init_data *init_params) { @@ -482,6 +490,17 @@ static bool construct_phy(struct dc_link *link, link->link_id = bios->funcs->get_connector_id(bios, init_params->connector_index); + /* Determine early if the link has any supported encoders, + * so that we avoid initializing DDC and HPD, etc. + */ + bp_funcs->get_src_obj(bios, link->link_id, 0, &enc_init_data.encoder); + enc_init_data.transmitter = translate_encoder_to_transmitter(enc_init_data.encoder); + + if (!transmitter_supported(enc_init_data.transmitter)) { + DC_LOG_WARNING("link_id %d has unsupported encoder\n", link->link_id.id); + return false; + } + link->ep_type = DISPLAY_ENDPOINT_PHY; DC_LOG_DC("BIOS object table - link_id: %d", link->link_id.id); @@ -611,16 +630,12 @@ static bool construct_phy(struct dc_link *link, dal_ddc_get_line(get_ddc_pin(link->ddc)); enc_init_data.ctx = dc_ctx; - bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0, - &enc_init_data.encoder); enc_init_data.connector = link->link_id; enc_init_data.channel = get_ddc_line(link); enc_init_data.hpd_source = get_hpd_line(link); link->hpd_src = enc_init_data.hpd_source; - enc_init_data.transmitter = - translate_encoder_to_transmitter(enc_init_data.encoder); link->link_enc = link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data); -- cgit v1.2.3 From 5834c33fd3f6f2a26dd4d6d4bbc7b8ed1c2ac4aa Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:50 +0200 Subject: drm/amd/display: Add concept of analog encoders (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a num_analog_stream_encoders field to indicate how many analog stream encoders are present. When analog stream encoders are present, create them. Additionally, add an analog_engine field to link encoders and search for supported analog encoders in the BIOS for each link. When connecting an RGB signal, search for analog stream encoders. The actual DCE analog link and stream encoder is going to be added in a subsequent commit. v2: Add check to see if an analog engine is really supported. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 +++++ .../gpu/drm/amd/display/dc/inc/hw/link_encoder.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/resource.h | 1 + drivers/gpu/drm/amd/display/dc/link/link_factory.c | 36 +++++++++++++++++++++- .../display/dc/resource/dce100/dce100_resource.c | 7 +++-- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 89fe72820f6a..dc0c4065a92c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -446,6 +446,14 @@ bool resource_construct( DC_ERR("DC: failed to create stream_encoder!\n"); pool->stream_enc_count++; } + + for (i = 0; i < caps->num_analog_stream_encoder; i++) { + pool->stream_enc[caps->num_stream_encoder + i] = + create_funcs->create_stream_encoder(ENGINE_ID_DACA + i, ctx); + if (pool->stream_enc[caps->num_stream_encoder + i] == NULL) + DC_ERR("DC: failed to create analog stream_encoder %d!\n", i); + pool->stream_enc_count++; + } } pool->hpo_dp_stream_enc_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index 08c16ba52a51..df512920a9fa 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -47,6 +47,7 @@ struct encoder_init_data { enum hpd_source_id hpd_source; /* TODO: in DAL2, here was pointer to EventManagerInterface */ struct graphics_object_id encoder; + enum engine_id analog_engine; struct dc_context *ctx; enum transmitter transmitter; }; @@ -83,6 +84,7 @@ struct link_encoder { struct graphics_object_id connector; uint32_t output_signals; enum engine_id preferred_engine; + enum engine_id analog_engine; struct encoder_feature_support features; enum transmitter transmitter; enum hpd_source_id hpd_source; diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 4e26a16a8743..79746d931471 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -49,6 +49,7 @@ struct resource_caps { int num_video_plane; int num_audio; int num_stream_encoder; + int num_analog_stream_encoder; int num_pll; int num_dwb; int num_ddc; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8899797f64a9..ebc3798e7d25 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -451,6 +451,32 @@ static enum channel_id get_ddc_line(struct dc_link *link) return channel; } +static enum engine_id find_analog_engine(struct dc_link *link) +{ + struct dc_bios *bp = link->ctx->dc_bios; + struct graphics_object_id encoder = {0}; + enum bp_result bp_result = BP_RESULT_OK; + int i; + + for (i = 0; i < 3; i++) { + bp_result = bp->funcs->get_src_obj(bp, link->link_id, i, &encoder); + + if (bp_result != BP_RESULT_OK) + return ENGINE_ID_UNKNOWN; + + switch (encoder.id) { + case ENCODER_ID_INTERNAL_DAC1: + case ENCODER_ID_INTERNAL_KLDSCP_DAC1: + return ENGINE_ID_DACA; + case ENCODER_ID_INTERNAL_DAC2: + case ENCODER_ID_INTERNAL_KLDSCP_DAC2: + return ENGINE_ID_DACB; + } + } + + return ENGINE_ID_UNKNOWN; +} + static bool transmitter_supported(const enum transmitter transmitter) { return transmitter != TRANSMITTER_UNKNOWN && @@ -459,6 +485,12 @@ static bool transmitter_supported(const enum transmitter transmitter) transmitter != TRANSMITTER_TRAVIS_LCD; } +static bool analog_engine_supported(const enum engine_id engine_id) +{ + return engine_id == ENGINE_ID_DACA || + engine_id == ENGINE_ID_DACB; +} + static bool construct_phy(struct dc_link *link, const struct link_init_data *init_params) { @@ -495,8 +527,10 @@ static bool construct_phy(struct dc_link *link, */ bp_funcs->get_src_obj(bios, link->link_id, 0, &enc_init_data.encoder); enc_init_data.transmitter = translate_encoder_to_transmitter(enc_init_data.encoder); + enc_init_data.analog_engine = find_analog_engine(link); - if (!transmitter_supported(enc_init_data.transmitter)) { + if (!transmitter_supported(enc_init_data.transmitter) && + !analog_engine_supported(enc_init_data.analog_engine)) { DC_LOG_WARNING("link_id %d has unsupported encoder\n", link->link_id.id); return false; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 550466de1a66..7573f5130ea9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -954,6 +954,10 @@ struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( int i; int j = -1; struct dc_link *link = stream->link; + enum engine_id preferred_engine = link->link_enc->preferred_engine; + + if (dc_is_rgb_signal(stream->signal)) + preferred_engine = link->link_enc->analog_engine; for (i = 0; i < pool->stream_enc_count; i++) { if (!res_ctx->is_stream_enc_acquired[i] && @@ -962,8 +966,7 @@ struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( * in daisy chain use case */ j = i; - if (pool->stream_enc[i]->id == - link->link_enc->preferred_engine) + if (pool->stream_enc[i]->id == preferred_engine) return pool->stream_enc[i]; } } -- cgit v1.2.3 From deb072d241c906ae378bc88ff91dfcdc34ebe71a Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:51 +0200 Subject: drm/amd/display: Implement DCE analog stream encoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add analog stream encoders for DCE which will be used when connecting an analog display through VGA or DVI-I. Considering that all stream encoder functions currently deal with digital streams, there is nothing for an analog stream encoder to do, making them basically a no-op. That being said, we still need some kind of stream encoder to represent an analog stream, and it is beneficial to split them from digital stream encoders in the code to make sure they don't accidentally write any DIG* registers. On supported chips there is currently up to 1 analog encoder, which is DACA. There are references to DACB in some code such as VBIOS commands and register files but it seems to be not present on DCE 6 and newer. Set num_analog_stream_encoder = 1 so that we can support the analog connectors on DCE 6-10, for now. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c | 14 ++++++++++++++ drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h | 5 +++++ .../drm/amd/display/dc/resource/dce100/dce100_resource.c | 6 ++++++ .../gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c | 8 ++++++++ .../gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c | 8 ++++++++ 5 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 1130d7619b26..f8996ee2856b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -1567,3 +1567,17 @@ void dce110_stream_encoder_construct( enc110->se_shift = se_shift; enc110->se_mask = se_mask; } + +static const struct stream_encoder_funcs dce110_an_str_enc_funcs = {0}; + +void dce110_analog_stream_encoder_construct( + struct dce110_stream_encoder *enc110, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id) +{ + enc110->base.funcs = &dce110_an_str_enc_funcs; + enc110->base.ctx = ctx; + enc110->base.id = eng_id; + enc110->base.bp = bp; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h index cc5020a8e1e1..068de1392121 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h @@ -708,6 +708,11 @@ void dce110_stream_encoder_construct( const struct dce_stream_encoder_shift *se_shift, const struct dce_stream_encoder_mask *se_mask); +void dce110_analog_stream_encoder_construct( + struct dce110_stream_encoder *enc110, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id); void dce110_se_audio_mute_control( struct stream_encoder *enc, bool mute); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 7573f5130ea9..3856477981c6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -375,6 +375,7 @@ static const struct bios_registers bios_regs = { static const struct resource_caps res_cap = { .num_timing_generator = 6, .num_audio = 6, + .num_analog_stream_encoder = 1, .num_stream_encoder = 6, .num_pll = 3, .num_ddc = 6, @@ -486,6 +487,11 @@ static struct stream_encoder *dce100_stream_encoder_create( if (!enc110) return NULL; + if (eng_id == ENGINE_ID_DACA || eng_id == ENGINE_ID_DACB) { + dce110_analog_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id); + return &enc110->base; + } + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); return &enc110->base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index b75be6ad64f6..7195f435e3f7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -373,6 +373,7 @@ static const struct bios_registers bios_regs = { static const struct resource_caps res_cap = { .num_timing_generator = 6, .num_audio = 6, + .num_analog_stream_encoder = 1, .num_stream_encoder = 6, .num_pll = 3, .num_ddc = 6, @@ -382,6 +383,7 @@ static const struct resource_caps res_cap_61 = { .num_timing_generator = 4, .num_audio = 6, .num_stream_encoder = 6, + .num_analog_stream_encoder = 1, .num_pll = 3, .num_ddc = 6, }; @@ -389,6 +391,7 @@ static const struct resource_caps res_cap_61 = { static const struct resource_caps res_cap_64 = { .num_timing_generator = 2, .num_audio = 2, + .num_analog_stream_encoder = 1, .num_stream_encoder = 2, .num_pll = 3, .num_ddc = 2, @@ -599,6 +602,11 @@ static struct stream_encoder *dce60_stream_encoder_create( if (!enc110) return NULL; + if (eng_id == ENGINE_ID_DACA || eng_id == ENGINE_ID_DACB) { + dce110_analog_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id); + return &enc110->base; + } + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index c4a56ec60b82..e9b79db37d1b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -375,6 +375,7 @@ static const struct bios_registers bios_regs = { static const struct resource_caps res_cap = { .num_timing_generator = 6, .num_audio = 6, + .num_analog_stream_encoder = 1, .num_stream_encoder = 6, .num_pll = 3, .num_ddc = 6, @@ -383,6 +384,7 @@ static const struct resource_caps res_cap = { static const struct resource_caps res_cap_81 = { .num_timing_generator = 4, .num_audio = 7, + .num_analog_stream_encoder = 1, .num_stream_encoder = 7, .num_pll = 3, .num_ddc = 6, @@ -391,6 +393,7 @@ static const struct resource_caps res_cap_81 = { static const struct resource_caps res_cap_83 = { .num_timing_generator = 2, .num_audio = 6, + .num_analog_stream_encoder = 1, .num_stream_encoder = 6, .num_pll = 2, .num_ddc = 2, @@ -607,6 +610,11 @@ static struct stream_encoder *dce80_stream_encoder_create( if (!enc110) return NULL; + if (eng_id == ENGINE_ID_DACA || eng_id == ENGINE_ID_DACB) { + dce110_analog_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id); + return &enc110->base; + } + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); -- cgit v1.2.3 From 0fbe321a93ce995da90483f716f9372e8eaa6797 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:52 +0200 Subject: drm/amd/display: Implement DCE analog link encoders (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We support two kinds of analog connections: 1. DVI-I, which allows both digital and analog signals: The DC code base only allows 1 encoder per connector, and the preferred engine type is still going to be digital. So, for DVI-I to work, we need to make sure the pre-existing link encoder can also work with analog signals. 1. VGA, which only supports analog signals: For VGA, we need to create a link encoder that only works with the DAC without perturbing any digital transmitter functionality. Since dce110_link_encoder already supports analog DVI-I, just reuse that code for VGA as well. v2: Reduce code churn by reusing same link encoder for VGA and DVI-I. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 85 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dce/dce_link_encoder.h | 16 ++-- .../display/dc/resource/dce100/dce100_resource.c | 16 +++- .../amd/display/dc/resource/dce60/dce60_resource.c | 19 ++++- .../amd/display/dc/resource/dce80/dce80_resource.c | 16 +++- 5 files changed, 141 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 0c50fe266c8a..87dbb8d7ed27 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -302,6 +302,10 @@ static void setup_panel_mode( if (ctx->dc->caps.psp_setup_panel_mode) return; + /* The code below is only applicable to encoders with a digital transmitter. */ + if (enc110->base.transmitter == TRANSMITTER_UNKNOWN) + return; + ASSERT(REG(DP_DPHY_INTERNAL_CTRL)); value = REG_READ(DP_DPHY_INTERNAL_CTRL); @@ -804,6 +808,33 @@ bool dce110_link_encoder_validate_dp_output( return true; } +static bool dce110_link_encoder_validate_rgb_output( + const struct dce110_link_encoder *enc110, + const struct dc_crtc_timing *crtc_timing) +{ + /* When the VBIOS doesn't specify any limits, use 400 MHz. + * The value comes from amdgpu_atombios_get_clock_info. + */ + uint32_t max_pixel_clock_khz = 400000; + + if (enc110->base.ctx->dc_bios->fw_info_valid && + enc110->base.ctx->dc_bios->fw_info.max_pixel_clock) { + max_pixel_clock_khz = + enc110->base.ctx->dc_bios->fw_info.max_pixel_clock; + } + + if (crtc_timing->pix_clk_100hz > max_pixel_clock_khz * 10) + return false; + + if (crtc_timing->display_color_depth != COLOR_DEPTH_888) + return false; + + if (crtc_timing->pixel_encoding != PIXEL_ENCODING_RGB) + return false; + + return true; +} + void dce110_link_encoder_construct( struct dce110_link_encoder *enc110, const struct encoder_init_data *init_data, @@ -824,6 +855,7 @@ void dce110_link_encoder_construct( enc110->base.connector = init_data->connector; enc110->base.preferred_engine = ENGINE_ID_UNKNOWN; + enc110->base.analog_engine = init_data->analog_engine; enc110->base.features = *enc_features; @@ -847,6 +879,11 @@ void dce110_link_encoder_construct( SIGNAL_TYPE_EDP | SIGNAL_TYPE_HDMI_TYPE_A; + if ((enc110->base.connector.id == CONNECTOR_ID_DUAL_LINK_DVII || + enc110->base.connector.id == CONNECTOR_ID_SINGLE_LINK_DVII) && + enc110->base.analog_engine != ENGINE_ID_UNKNOWN) + enc110->base.output_signals |= SIGNAL_TYPE_RGB; + /* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE. * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY. * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer @@ -885,6 +922,13 @@ void dce110_link_encoder_construct( enc110->base.preferred_engine = ENGINE_ID_DIGG; break; default: + if (init_data->analog_engine != ENGINE_ID_UNKNOWN) { + /* The connector is analog-only, ie. VGA */ + enc110->base.preferred_engine = init_data->analog_engine; + enc110->base.output_signals = SIGNAL_TYPE_RGB; + enc110->base.transmitter = TRANSMITTER_UNKNOWN; + break; + } ASSERT_CRITICAL(false); enc110->base.preferred_engine = ENGINE_ID_UNKNOWN; } @@ -939,6 +983,10 @@ bool dce110_link_encoder_validate_output_with_stream( is_valid = dce110_link_encoder_validate_dp_output( enc110, &stream->timing); break; + case SIGNAL_TYPE_RGB: + is_valid = dce110_link_encoder_validate_rgb_output( + enc110, &stream->timing); + break; case SIGNAL_TYPE_EDP: case SIGNAL_TYPE_LVDS: is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB; @@ -969,6 +1017,10 @@ void dce110_link_encoder_hw_init( cntl.coherent = false; cntl.hpd_sel = enc110->base.hpd_source; + /* The code below is only applicable to encoders with a digital transmitter. */ + if (enc110->base.transmitter == TRANSMITTER_UNKNOWN) + return; + if (enc110->base.connector.id == CONNECTOR_ID_EDP) cntl.signal = SIGNAL_TYPE_EDP; @@ -1034,6 +1086,8 @@ void dce110_link_encoder_setup( /* DP MST */ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 5); break; + case SIGNAL_TYPE_RGB: + break; default: ASSERT_CRITICAL(false); /* invalid mode ! */ @@ -1282,6 +1336,24 @@ void dce110_link_encoder_disable_output( struct bp_transmitter_control cntl = { 0 }; enum bp_result result; + switch (enc->analog_engine) { + case ENGINE_ID_DACA: + REG_UPDATE(DAC_ENABLE, DAC_ENABLE, 0); + break; + case ENGINE_ID_DACB: + /* DACB doesn't seem to be present on DCE6+, + * although there are references to it in the register file. + */ + DC_LOG_ERROR("%s DACB is unsupported\n", __func__); + break; + default: + break; + } + + /* The code below only applies to connectors that support digital signals. */ + if (enc->transmitter == TRANSMITTER_UNKNOWN) + return; + if (!dce110_is_dig_enabled(enc)) { /* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */ return; @@ -1726,6 +1798,7 @@ void dce60_link_encoder_construct( enc110->base.connector = init_data->connector; enc110->base.preferred_engine = ENGINE_ID_UNKNOWN; + enc110->base.analog_engine = init_data->analog_engine; enc110->base.features = *enc_features; @@ -1749,6 +1822,11 @@ void dce60_link_encoder_construct( SIGNAL_TYPE_EDP | SIGNAL_TYPE_HDMI_TYPE_A; + if ((enc110->base.connector.id == CONNECTOR_ID_DUAL_LINK_DVII || + enc110->base.connector.id == CONNECTOR_ID_SINGLE_LINK_DVII) && + enc110->base.analog_engine != ENGINE_ID_UNKNOWN) + enc110->base.output_signals |= SIGNAL_TYPE_RGB; + /* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE. * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY. * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer @@ -1787,6 +1865,13 @@ void dce60_link_encoder_construct( enc110->base.preferred_engine = ENGINE_ID_DIGG; break; default: + if (init_data->analog_engine != ENGINE_ID_UNKNOWN) { + /* The connector is analog-only, ie. VGA */ + enc110->base.preferred_engine = init_data->analog_engine; + enc110->base.output_signals = SIGNAL_TYPE_RGB; + enc110->base.transmitter = TRANSMITTER_UNKNOWN; + break; + } ASSERT_CRITICAL(false); enc110->base.preferred_engine = ENGINE_ID_UNKNOWN; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 261c70e01e33..c58b69bc319b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -101,18 +101,21 @@ SRI(DP_SEC_CNTL, DP, id), \ SRI(DP_VID_STREAM_CNTL, DP, id), \ SRI(DP_DPHY_FAST_TRAINING, DP, id), \ - SRI(DP_SEC_CNTL1, DP, id) + SRI(DP_SEC_CNTL1, DP, id), \ + SR(DAC_ENABLE) #endif #define LE_DCE80_REG_LIST(id)\ SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \ - LE_COMMON_REG_LIST_BASE(id) + LE_COMMON_REG_LIST_BASE(id), \ + SR(DAC_ENABLE) #define LE_DCE100_REG_LIST(id)\ LE_COMMON_REG_LIST_BASE(id), \ SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \ SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \ - SR(DCI_MEM_PWR_STATUS) + SR(DCI_MEM_PWR_STATUS), \ + SR(DAC_ENABLE) #define LE_DCE110_REG_LIST(id)\ LE_COMMON_REG_LIST_BASE(id), \ @@ -181,6 +184,9 @@ struct dce110_link_enc_registers { uint32_t DP_DPHY_BS_SR_SWAP_CNTL; uint32_t DP_DPHY_HBR2_PATTERN_CONTROL; uint32_t DP_SEC_CNTL1; + + /* DAC registers */ + uint32_t DAC_ENABLE; }; struct dce110_link_encoder { @@ -215,10 +221,6 @@ bool dce110_link_encoder_validate_dvi_output( enum signal_type signal, const struct dc_crtc_timing *crtc_timing); -bool dce110_link_encoder_validate_rgb_output( - const struct dce110_link_encoder *enc110, - const struct dc_crtc_timing *crtc_timing); - bool dce110_link_encoder_validate_dp_output( const struct dce110_link_encoder *enc110, const struct dc_crtc_timing *crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 3856477981c6..85ea219e4fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -225,6 +225,7 @@ static const struct dce110_link_enc_registers link_enc_regs[] = { link_regs(4), link_regs(5), link_regs(6), + { .DAC_ENABLE = mmDAC_ENABLE }, }; #define stream_enc_regs(id)\ @@ -632,7 +633,20 @@ static struct link_encoder *dce100_link_encoder_create( kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) + return NULL; + + if (enc_init_data->connector.id == CONNECTOR_ID_VGA) { + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[ENGINE_ID_DACA], + NULL, + NULL); + return &enc110->base; + } + + if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) return NULL; link_regs_id = diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 7195f435e3f7..6e89b7020b61 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -240,7 +240,9 @@ static const struct dce110_link_enc_registers link_enc_regs[] = { link_regs(2), link_regs(3), link_regs(4), - link_regs(5) + link_regs(5), + {0}, + { .DAC_ENABLE = mmDAC_ENABLE }, }; #define stream_enc_regs(id)\ @@ -726,7 +728,20 @@ static struct link_encoder *dce60_link_encoder_create( kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) + return NULL; + + if (enc_init_data->connector.id == CONNECTOR_ID_VGA) { + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[ENGINE_ID_DACA], + NULL, + NULL); + return &enc110->base; + } + + if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) return NULL; link_regs_id = diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index e9b79db37d1b..a1ad7398e39f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -241,6 +241,7 @@ static const struct dce110_link_enc_registers link_enc_regs[] = { link_regs(4), link_regs(5), link_regs(6), + { .DAC_ENABLE = mmDAC_ENABLE }, }; #define stream_enc_regs(id)\ @@ -734,7 +735,20 @@ static struct link_encoder *dce80_link_encoder_create( kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) + return NULL; + + if (enc_init_data->connector.id == CONNECTOR_ID_VGA) { + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[ENGINE_ID_DACA], + NULL, + NULL); + return &enc110->base; + } + + if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) return NULL; link_regs_id = -- cgit v1.2.3 From 3be26d81b150c8bd4ee24f58b5bb558d9baaf064 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:53 +0200 Subject: drm/amd/display: Support DAC in dce110_hwseq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dce110_hwseq is used by all DCE hardware, so add the DAC support here. When enabling/disabling a stream for a RGB signal, this will call the VBIOS to enable/disable the DAC. Additionally, when applying the controller context, call SelectCRTC_Source from VBIOS in order to direct the CRTC output to the DAC. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 75 +++++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 6b0566baa2f2..3005115c8505 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -659,6 +659,20 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) } } +static void +dce110_dac_encoder_control(struct pipe_ctx *pipe_ctx, bool enable) +{ + struct dc_link *link = pipe_ctx->stream->link; + struct dc_bios *bios = link->ctx->dc_bios; + struct bp_encoder_control encoder_control = {0}; + + encoder_control.action = enable ? ENCODER_CONTROL_ENABLE : ENCODER_CONTROL_DISABLE; + encoder_control.engine_id = link->link_enc->analog_engine; + encoder_control.pixel_clock = pipe_ctx->stream->timing.pix_clk_100hz / 10; + + bios->funcs->encoder_control(bios, &encoder_control); +} + void dce110_enable_stream(struct pipe_ctx *pipe_ctx) { enum dc_lane_count lane_count = @@ -689,6 +703,9 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) early_control = lane_count; tg->funcs->set_early_control(tg, early_control); + + if (dc_is_rgb_signal(pipe_ctx->stream->signal)) + dce110_dac_encoder_control(pipe_ctx, true); } static enum bp_result link_transmitter_control( @@ -1176,7 +1193,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( pipe_ctx->stream_res.stream_enc); - dc->hwss.disable_audio_stream(pipe_ctx); + if (!dc_is_rgb_signal(pipe_ctx->stream->signal)) + dc->hwss.disable_audio_stream(pipe_ctx); link_hwss->reset_stream_encoder(pipe_ctx); @@ -1196,6 +1214,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); } + + if (dc_is_rgb_signal(pipe_ctx->stream->signal)) + dce110_dac_encoder_control(pipe_ctx, false); } void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, @@ -1581,6 +1602,51 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } +static void +dce110_select_crtc_source(struct pipe_ctx *pipe_ctx) +{ + struct dc_link *link = pipe_ctx->stream->link; + struct dc_bios *bios = link->ctx->dc_bios; + struct bp_crtc_source_select crtc_source_select = {0}; + enum engine_id engine_id = link->link_enc->preferred_engine; + uint8_t bit_depth; + + if (dc_is_rgb_signal(pipe_ctx->stream->signal)) + engine_id = link->link_enc->analog_engine; + + switch (pipe_ctx->stream->timing.display_color_depth) { + case COLOR_DEPTH_UNDEFINED: + bit_depth = 0; + break; + case COLOR_DEPTH_666: + bit_depth = 6; + break; + default: + case COLOR_DEPTH_888: + bit_depth = 8; + break; + case COLOR_DEPTH_101010: + bit_depth = 10; + break; + case COLOR_DEPTH_121212: + bit_depth = 12; + break; + case COLOR_DEPTH_141414: + bit_depth = 14; + break; + case COLOR_DEPTH_161616: + bit_depth = 16; + break; + } + + crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst; + crtc_source_select.bit_depth = bit_depth; + crtc_source_select.engine_id = engine_id; + crtc_source_select.sink_signal = pipe_ctx->stream->signal; + + bios->funcs->select_crtc_source(bios, &crtc_source_select); +} + enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, struct dc_state *context, @@ -1600,6 +1666,10 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( hws->funcs.disable_stream_gating(dc, pipe_ctx); } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_RGB) { + dce110_select_crtc_source(pipe_ctx); + } + if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output = {0}; @@ -1679,7 +1749,8 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg->funcs->set_static_screen_control( pipe_ctx->stream_res.tg, event_triggers, 2); - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) + if (!dc_is_virtual_signal(pipe_ctx->stream->signal) && + !dc_is_rgb_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); -- cgit v1.2.3 From e45d60a76999a2128bd1bd3d2e1597eec8b49c78 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:54 +0200 Subject: drm/amd/display: Add analog link detection (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analog displays typically have a DDC connection which can be used by the GPU to read EDID. This commit adds the capability to probe analog displays using DDC, reading the EDID header and deciding whether the analog link is connected based on the data that was read. Note that VGA has no HPD (hotplug detection), so we need to to do analog link detection for VGA before checking HPD. In case of DVI-I, while the connector supports HPD, not all analog cables connect the HPD pins, so we can't rely on HPD either. For reference, see the legacy display code: amdgpu_connector_vga_detect amdgpu_display_ddc_probe DAC load detection will be implemented in a separate commit. v2: Fix crash / black screen on newer GPUs during link detection. Ignore HPD pin for analog connectors. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/hwss/link_hwss_dio.c | 16 ++-- .../gpu/drm/amd/display/dc/link/link_detection.c | 86 +++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 + drivers/gpu/drm/amd/display/dc/link/link_factory.c | 3 + 4 files changed, 101 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index 80344cbd1f99..befa67b2b2ae 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -58,8 +58,9 @@ void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx) return; } - link_enc->funcs->connect_dig_be_to_fe(link_enc, - pipe_ctx->stream_res.stream_enc->id, true); + if (!dc_is_rgb_signal(pipe_ctx->stream->signal)) + link_enc->funcs->connect_dig_be_to_fe(link_enc, + pipe_ctx->stream_res.stream_enc->id, true); if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->ctx->dc->link_srv->dp_trace_source_sequence(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE); @@ -98,10 +99,13 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) if (stream_enc->funcs->enable_stream) stream_enc->funcs->enable_stream(stream_enc, pipe_ctx->stream->signal, false); - link_enc->funcs->connect_dig_be_to_fe( - link_enc, - pipe_ctx->stream_res.stream_enc->id, - false); + + if (!dc_is_rgb_signal(pipe_ctx->stream->signal)) + link_enc->funcs->connect_dig_be_to_fe( + link_enc, + pipe_ctx->stream_res.stream_enc->id, + false); + if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->ctx->dc->link_srv->dp_trace_source_sequence( pipe_ctx->stream->link, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 801c94efa7fc..d83fd52cb1b0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -862,6 +862,48 @@ static void verify_link_capability(struct dc_link *link, struct dc_sink *sink, verify_link_capability_non_destructive(link); } +/** + * link_detect_evaluate_edid_header() - Evaluate if an EDID header is acceptable. + * + * Evaluates an 8-byte EDID header to check if it's good enough + * for the purpose of determining whether a display is connected + * without reading the full EDID. + */ +static bool link_detect_evaluate_edid_header(uint8_t edid_header[8]) +{ + int edid_header_score = 0; + int i; + + for (i = 0; i < 8; ++i) + edid_header_score += edid_header[i] == ((i == 0 || i == 7) ? 0x00 : 0xff); + + return edid_header_score >= 6; +} + +/** + * link_detect_ddc_probe() - Probe the DDC to see if a display is connected. + * + * Detect whether a display is connected to DDC without reading full EDID. + * Reads only the EDID header (the first 8 bytes of EDID) from DDC and + * evaluates whether that matches. + */ +static bool link_detect_ddc_probe(struct dc_link *link) +{ + if (!link->ddc) + return false; + + uint8_t edid_header[8] = {0}; + bool ddc_probed = i2c_read(link->ddc, 0x50, edid_header, sizeof(edid_header)); + + if (!ddc_probed) + return false; + + if (!link_detect_evaluate_edid_header(edid_header)) + return false; + + return true; +} + /* * detect_link_and_local_sink() - Detect if a sink is attached to a given link * @@ -946,6 +988,12 @@ static bool detect_link_and_local_sink(struct dc_link *link, break; } + case SIGNAL_TYPE_RGB: { + sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C; + sink_caps.signal = SIGNAL_TYPE_RGB; + break; + } + case SIGNAL_TYPE_LVDS: { sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C; sink_caps.signal = SIGNAL_TYPE_LVDS; @@ -1139,9 +1187,17 @@ static bool detect_link_and_local_sink(struct dc_link *link, sink = prev_sink; prev_sink = NULL; } - query_hdcp_capability(sink->sink_signal, link); + + if (!sink->edid_caps.analog) + query_hdcp_capability(sink->sink_signal, link); } + /* DVI-I connector connected to analog display. */ + if ((link->link_id.id == CONNECTOR_ID_DUAL_LINK_DVII || + link->link_id.id == CONNECTOR_ID_SINGLE_LINK_DVII) && + sink->edid_caps.analog) + sink->sink_signal = SIGNAL_TYPE_RGB; + /* HDMI-DVI Dongle */ if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A && !sink->edid_caps.edid_hdmi) @@ -1238,6 +1294,23 @@ static bool detect_link_and_local_sink(struct dc_link *link, return true; } +/** + * link_detect_analog() - Determines if an analog sink is connected. + */ +static bool link_detect_analog(struct dc_link *link, enum dc_connection_type *type) +{ + /* Don't care about connectors that don't support an analog signal. */ + ASSERT(dc_connector_supports_analog(link->link_id.id)); + + if (link_detect_ddc_probe(link)) { + *type = dc_connection_single; + return true; + } + + *type = dc_connection_none; + return true; +} + /* * link_detect_connection_type() - Determine if there is a sink connected * @@ -1254,6 +1327,17 @@ bool link_detect_connection_type(struct dc_link *link, enum dc_connection_type * return true; } + /* Ignore the HPD pin (if any) for analog connectors. + * Instead rely on DDC. + * + * - VGA connectors don't have any HPD at all. + * - Some DVI-A cables don't connect the HPD pin. + * - Some DVI-A cables pull up the HPD pin. + * (So it's high even when no display is connected.) + */ + if (dc_connector_supports_analog(link->link_id.id)) + return link_detect_analog(link, type); + if (link->connector_signal == SIGNAL_TYPE_EDP) { /*in case it is not on*/ if (!link->dc->config.edp_no_power_sequencing) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index fdefe27f4ba3..4ddcdc222913 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2258,6 +2258,9 @@ static enum dc_status enable_link( enable_link_lvds(pipe_ctx); status = DC_OK; break; + case SIGNAL_TYPE_RGB: + status = DC_OK; + break; case SIGNAL_TYPE_VIRTUAL: status = enable_link_virtual(pipe_ctx); break; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index ebc3798e7d25..7989baf3843c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -583,6 +583,9 @@ static bool construct_phy(struct dc_link *link, case CONNECTOR_ID_DUAL_LINK_DVII: link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; + case CONNECTOR_ID_VGA: + link->connector_signal = SIGNAL_TYPE_RGB; + break; case CONNECTOR_ID_DISPLAY_PORT: case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: -- cgit v1.2.3 From 8223a605744bb471f31018eac9075a539415b16f Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:55 +0200 Subject: drm/amd/display: Refactor amdgpu_dm_connector_detect (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for polling analog connectors. Document the function better. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 35 ++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9e260ca04e1e..3bd404b727b6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7226,29 +7226,32 @@ finish: return stream; } +/** + * amdgpu_dm_connector_detect() - Detect whether a DRM connector is connected to a display + * + * A connector is considered connected when it has a sink that is not NULL. + * For connectors that support HPD (hotplug detection), the connection is + * handled in the HPD interrupt. + * + * Notes: + * 1. This interface is NOT called in context of HPD irq. + * 2. This interface *is called* in context of user-mode ioctl. Which + * makes it a bad place for *any* MST-related activity. + */ static enum drm_connector_status amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) { - bool connected; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - /* - * Notes: - * 1. This interface is NOT called in context of HPD irq. - * 2. This interface *is called* in context of user-mode ioctl. Which - * makes it a bad place for *any* MST-related activity. - */ - - if (aconnector->base.force == DRM_FORCE_UNSPECIFIED && - !aconnector->fake_enable) - connected = (aconnector->dc_sink != NULL); - else - connected = (aconnector->base.force == DRM_FORCE_ON || - aconnector->base.force == DRM_FORCE_ON_DIGITAL); - update_subconnector_property(aconnector); - return (connected ? connector_status_connected : + if (aconnector->base.force == DRM_FORCE_ON || + aconnector->base.force == DRM_FORCE_ON_DIGITAL) + return connector_status_connected; + else if (aconnector->base.force == DRM_FORCE_OFF) + return connector_status_disconnected; + + return (aconnector->dc_sink ? connector_status_connected : connector_status_disconnected); } -- cgit v1.2.3 From c4f3f114e73c331f02c7188af650901ee3bad2c1 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:56 +0200 Subject: drm/amd/display: Poll analog connectors (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VGA connectors don't support any hotplug detection, so the kernel needs to periodically poll them to see if a display is connected. DVI-I connectors have hotplug detection for digital signals, and some analog DVI cables pull up that pin to work with that. However, in general not all DVI cables do this so we can't rely on this feature, therefore we need to poll DVI-I connectors as well. v2: Call drm_kms_helper_poll_fini in amdgpu_dm_hpd_fini. Disable/enable polling on suspend/resume. Don't call full link detection when already connected. v3: Encounter CLANG build failure. Remove unused variable: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_irq.c:980:7: error: variable 'use_polling' set but not used [-Werror,-Wunused-but- set-variable] 980 | bool use_polling = false; Signed-off-by: Timur Kristóf Signed-off-by: Wayne Lin Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 73 +++++++++++++++++++++- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 17 +++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3bd404b727b6..d18d72a520f2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3853,7 +3853,9 @@ void amdgpu_dm_update_connector_after_detect( drm_dbg_kms(dev, "DCHPD: connector_id=%d: Old sink=%p New sink=%p\n", aconnector->connector_id, aconnector->dc_sink, sink); - guard(mutex)(&dev->mode_config.mutex); + /* When polling, DRM has already locked the mutex for us. */ + if (!drm_kms_helper_is_poll_worker()) + mutex_lock(&dev->mode_config.mutex); /* * 1. Update status of the drm connector @@ -3916,6 +3918,10 @@ void amdgpu_dm_update_connector_after_detect( } update_subconnector_property(aconnector); + + /* When polling, the mutex will be unlocked for us by DRM. */ + if (!drm_kms_helper_is_poll_worker()) + mutex_unlock(&dev->mode_config.mutex); } static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector) @@ -7226,12 +7232,63 @@ finish: return stream; } +/** + * amdgpu_dm_connector_poll() - Poll a connector to see if it's connected to a display + * + * Used for connectors that don't support HPD (hotplug detection) + * to periodically checked whether the connector is connected to a display. + */ +static enum drm_connector_status +amdgpu_dm_connector_poll(struct amdgpu_dm_connector *aconnector, bool force) +{ + struct drm_connector *connector = &aconnector->base; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + struct dc_link *link = aconnector->dc_link; + enum dc_connection_type conn_type = dc_connection_none; + enum drm_connector_status status = connector_status_disconnected; + + mutex_lock(&aconnector->hpd_lock); + + if (dc_link_detect_connection_type(aconnector->dc_link, &conn_type) && + conn_type != dc_connection_none) { + mutex_lock(&adev->dm.dc_lock); + + /* Only call full link detection when a sink isn't created yet, + * ie. just when the display is plugged in, otherwise we risk flickering. + */ + if (link->local_sink || + dc_link_detect(link, DETECT_REASON_HPD)) + status = connector_status_connected; + + mutex_unlock(&adev->dm.dc_lock); + } + + if (connector->status != status) { + if (status == connector_status_disconnected) { + if (link->local_sink) + dc_sink_release(link->local_sink); + + link->local_sink = NULL; + link->dpcd_sink_count = 0; + link->type = dc_connection_none; + } + + amdgpu_dm_update_connector_after_detect(aconnector); + } + + mutex_unlock(&aconnector->hpd_lock); + return status; +} + /** * amdgpu_dm_connector_detect() - Detect whether a DRM connector is connected to a display * * A connector is considered connected when it has a sink that is not NULL. * For connectors that support HPD (hotplug detection), the connection is * handled in the HPD interrupt. + * For connectors that may not support HPD, such as analog connectors, + * DRM will call this function repeatedly to poll them. * * Notes: * 1. This interface is NOT called in context of HPD irq. @@ -7251,6 +7308,14 @@ amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) else if (aconnector->base.force == DRM_FORCE_OFF) return connector_status_disconnected; + /* Poll analog connectors and only when either + * disconnected or connected to an analog display. + */ + if (drm_kms_helper_is_poll_worker() && + dc_connector_supports_analog(aconnector->dc_link->link_id.id) && + (!aconnector->dc_sink || aconnector->dc_sink->edid_caps.analog)) + return amdgpu_dm_connector_poll(aconnector, force); + return (aconnector->dc_sink ? connector_status_connected : connector_status_disconnected); } @@ -8675,9 +8740,13 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, link->link_enc->features.dp_ycbcr420_supported ? true : false; break; case DRM_MODE_CONNECTOR_DVID: - case DRM_MODE_CONNECTOR_DVII: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; break; + case DRM_MODE_CONNECTOR_DVII: + case DRM_MODE_CONNECTOR_VGA: + aconnector->base.polled = + DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + break; default: break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index a1c722112c22..0a2a3f233a0e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -476,6 +476,7 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev) void amdgpu_dm_irq_suspend(struct amdgpu_device *adev) { + struct drm_device *dev = adev_to_drm(adev); int src; struct list_head *hnd_list_h; struct list_head *hnd_list_l; @@ -512,6 +513,9 @@ void amdgpu_dm_irq_suspend(struct amdgpu_device *adev) } DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); + + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_disable(dev); } void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev) @@ -537,6 +541,7 @@ void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev) void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev) { + struct drm_device *dev = adev_to_drm(adev); int src; struct list_head *hnd_list_h, *hnd_list_l; unsigned long irq_table_flags; @@ -557,6 +562,9 @@ void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev) } DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); + + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_enable(dev); } /* @@ -893,6 +901,7 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_connector_list_iter iter; int irq_type; int i; + bool use_polling = false; /* First, clear all hpd and hpdrx interrupts */ for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) { @@ -906,6 +915,8 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct amdgpu_dm_connector *amdgpu_dm_connector; const struct dc_link *dc_link; + use_polling |= connector->polled != DRM_CONNECTOR_POLL_HPD; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -947,6 +958,9 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + if (use_polling) + drm_kms_helper_poll_init(dev); } /** @@ -997,4 +1011,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + if (dev->mode_config.poll_enabled) + drm_kms_helper_poll_fini(dev); } -- cgit v1.2.3 From 2debe0ecb53d24b5150334fa87fc02351c030046 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:57 +0200 Subject: drm/amd/display: Add DCE BIOS_SCRATCH_0 register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BIOS uses this register to write the results of the DAC_LoadDetection command, so we'll need to read this in order to make DAC load detection work. As a reference, I used the mmBIOS_SCRATCH_0 definition from the amdgpu legacy display code. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 1 + drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c | 1 + drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c | 2 ++ 7 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 545ce1e15eae..50c8906b74c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -168,6 +168,7 @@ struct dc_vbios_funcs { }; struct bios_registers { + uint32_t BIOS_SCRATCH_0; uint32_t BIOS_SCRATCH_3; uint32_t BIOS_SCRATCH_6; }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 85ea219e4fd4..d40d91ec2035 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -78,6 +78,7 @@ #endif #ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_0 0x05C9 #define mmBIOS_SCRATCH_2 0x05CB #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF @@ -369,6 +370,7 @@ static const struct dce_abm_mask abm_mask = { #define DCFE_MEM_PWR_CTRL_REG_BASE 0x1b03 static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0, .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index 9e14ffb3c942..cd54382c0af3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -82,6 +82,7 @@ #endif #ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_0 0x05C9 #define mmBIOS_SCRATCH_2 0x05CB #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF @@ -377,6 +378,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0, .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 62977bcdeaa0..3f0a6bc4dcc2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -76,6 +76,7 @@ #endif #ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_0 0x05C9 #define mmBIOS_SCRATCH_2 0x05CB #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF @@ -385,6 +386,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0, .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 0770ea37183f..b1570b6b1af3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -491,6 +491,7 @@ static struct dce_i2c_hw *dce120_i2c_hw_create( return dce_i2c_hw; } static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0 + NBIO_BASE(mmBIOS_SCRATCH_0_BASE_IDX), .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3 + NBIO_BASE(mmBIOS_SCRATCH_3_BASE_IDX), .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 + NBIO_BASE(mmBIOS_SCRATCH_6_BASE_IDX) }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 6e89b7020b61..f0152933bee2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -80,6 +80,7 @@ #ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_0 0x05C9 #define mmBIOS_SCRATCH_2 0x05CB #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF @@ -368,6 +369,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0, .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index a1ad7398e39f..8687104cabb7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -78,6 +78,7 @@ #ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_0 0x05C9 #define mmBIOS_SCRATCH_2 0x05CB #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF @@ -369,6 +370,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_0 = mmBIOS_SCRATCH_0, .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; -- cgit v1.2.3 From 7f1d1c2f4da0640d6e54496245cbac3ed1d2e8b9 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:58 +0200 Subject: drm/amd/display: Make get_support_mask_for_device_id reusable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be reused by DAC load detection. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 33d0ec38ded7..a126ca3a53fb 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -67,7 +67,9 @@ static ATOM_HPD_INT_RECORD *get_hpd_record(struct bios_parser *bp, ATOM_OBJECT *object); static struct device_id device_type_from_device_id(uint16_t device_id); static uint32_t signal_to_ss_id(enum as_signal_type signal); -static uint32_t get_support_mask_for_device_id(struct device_id device_id); +static uint32_t get_support_mask_for_device_id( + enum dal_device_type device_type, + uint32_t enum_id); static ATOM_ENCODER_CAP_RECORD_V2 *get_encoder_cap_record( struct bios_parser *bp, ATOM_OBJECT *object); @@ -888,7 +890,7 @@ static bool bios_parser_is_device_id_supported( { struct bios_parser *bp = BP_FROM_DCB(dcb); - uint32_t mask = get_support_mask_for_device_id(id); + uint32_t mask = get_support_mask_for_device_id(id.device_type, id.enum_id); return (le16_to_cpu(bp->object_info_tbl.v1_1->usDeviceSupport) & mask) != 0; } @@ -2179,11 +2181,10 @@ static uint32_t signal_to_ss_id(enum as_signal_type signal) return clk_id_ss; } -static uint32_t get_support_mask_for_device_id(struct device_id device_id) +static uint32_t get_support_mask_for_device_id( + enum dal_device_type device_type, + uint32_t enum_id) { - enum dal_device_type device_type = device_id.device_type; - uint32_t enum_id = device_id.enum_id; - switch (device_type) { case DEVICE_TYPE_LCD: switch (enum_id) { -- cgit v1.2.3 From d75e45b853630dbcd2b5c1a1aea7e2f3dc12effc Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:01:59 +0200 Subject: drm/amd/display: Add DAC_LoadDetection to BIOS parser (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DAC_LoadDetection can be used to determine whether something is connected to an analog connector by determining if there is an analog load. This causes visible flickering on displays, so we only resort to using this when the connected display doesn't have an EDID. For reference, see the legacy display code: amdgpu_atombios_encoder_dac_load_detect v2: Only clear corresponding bit from BIOS_SCRATCH_0. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 50 ++++++++++++ .../gpu/drm/amd/display/dc/bios/command_table.c | 92 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/bios/command_table.h | 3 + drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 5 ++ .../drm/amd/display/include/bios_parser_types.h | 5 ++ 5 files changed, 155 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index a126ca3a53fb..4120d6c4c5e4 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -780,6 +780,54 @@ static enum bp_result bios_parser_encoder_control( return bp->cmd_tbl.dig_encoder_control(bp, cntl); } +static enum bp_result bios_parser_dac_load_detection( + struct dc_bios *dcb, + enum engine_id engine_id, + enum dal_device_type device_type, + uint32_t enum_id) +{ + struct bios_parser *bp = BP_FROM_DCB(dcb); + struct dc_context *ctx = dcb->ctx; + struct bp_load_detection_parameters bp_params = {0}; + enum bp_result bp_result; + uint32_t bios_0_scratch; + uint32_t device_id_mask = 0; + + bp_params.engine_id = engine_id; + bp_params.device_id = get_support_mask_for_device_id(device_type, enum_id); + + if (engine_id != ENGINE_ID_DACA && + engine_id != ENGINE_ID_DACB) + return BP_RESULT_UNSUPPORTED; + + if (!bp->cmd_tbl.dac_load_detection) + return BP_RESULT_UNSUPPORTED; + + if (bp_params.device_id == ATOM_DEVICE_CRT1_SUPPORT) + device_id_mask = ATOM_S0_CRT1_MASK; + else if (bp_params.device_id == ATOM_DEVICE_CRT1_SUPPORT) + device_id_mask = ATOM_S0_CRT2_MASK; + else + return BP_RESULT_UNSUPPORTED; + + /* BIOS will write the detected devices to BIOS_SCRATCH_0, clear corresponding bit */ + bios_0_scratch = dm_read_reg(ctx, bp->base.regs->BIOS_SCRATCH_0); + bios_0_scratch &= ~device_id_mask; + dm_write_reg(ctx, bp->base.regs->BIOS_SCRATCH_0, bios_0_scratch); + + bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params); + + if (bp_result != BP_RESULT_OK) + return bp_result; + + bios_0_scratch = dm_read_reg(ctx, bp->base.regs->BIOS_SCRATCH_0); + + if (bios_0_scratch & device_id_mask) + return BP_RESULT_OK; + + return BP_RESULT_FAILURE; +} + static enum bp_result bios_parser_adjust_pixel_clock( struct dc_bios *dcb, struct bp_adjust_pixel_clock_parameters *bp_params) @@ -2864,6 +2912,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .encoder_control = bios_parser_encoder_control, + .dac_load_detection = bios_parser_dac_load_detection, + .transmitter_control = bios_parser_transmitter_control, .enable_crtc = bios_parser_enable_crtc, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index dbd84477ceb7..22457f417e65 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -54,6 +54,7 @@ static void init_enable_spread_spectrum_on_ppll(struct bios_parser *bp); static void init_adjust_display_pll(struct bios_parser *bp); static void init_select_crtc_source(struct bios_parser *bp); static void init_dac_encoder_control(struct bios_parser *bp); +static void init_dac_load_detection(struct bios_parser *bp); static void init_dac_output_control(struct bios_parser *bp); static void init_set_crtc_timing(struct bios_parser *bp); static void init_enable_crtc(struct bios_parser *bp); @@ -72,6 +73,7 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp) init_adjust_display_pll(bp); init_select_crtc_source(bp); init_dac_encoder_control(bp); + init_dac_load_detection(bp); init_dac_output_control(bp); init_set_crtc_timing(bp); init_enable_crtc(bp); @@ -1902,6 +1904,96 @@ static enum bp_result dac2_encoder_control_v1( return result; } +/******************************************************************************* + ******************************************************************************** + ** + ** DAC LOAD DETECTION + ** + ******************************************************************************** + *******************************************************************************/ + +static enum bp_result dac_load_detection_v1( + struct bios_parser *bp, + struct bp_load_detection_parameters *bp_params); + +static enum bp_result dac_load_detection_v3( + struct bios_parser *bp, + struct bp_load_detection_parameters *bp_params); + +static void init_dac_load_detection(struct bios_parser *bp) +{ + switch (BIOS_CMD_TABLE_PARA_REVISION(DAC_LoadDetection)) { + case 1: + case 2: + bp->cmd_tbl.dac_load_detection = dac_load_detection_v1; + break; + case 3: + default: + bp->cmd_tbl.dac_load_detection = dac_load_detection_v3; + break; + } +} + +static void dac_load_detect_prepare_params( + struct _DAC_LOAD_DETECTION_PS_ALLOCATION *params, + enum engine_id engine_id, + uint16_t device_id, + uint8_t misc) +{ + uint8_t dac_type = ENGINE_ID_DACA; + + if (engine_id == ENGINE_ID_DACB) + dac_type = ATOM_DAC_B; + + params->sDacload.usDeviceID = cpu_to_le16(device_id); + params->sDacload.ucDacType = dac_type; + params->sDacload.ucMisc = misc; +} + +static enum bp_result dac_load_detection_v1( + struct bios_parser *bp, + struct bp_load_detection_parameters *bp_params) +{ + enum bp_result result = BP_RESULT_FAILURE; + DAC_LOAD_DETECTION_PS_ALLOCATION params; + + dac_load_detect_prepare_params( + ¶ms, + bp_params->engine_id, + bp_params->device_id, + 0); + + if (EXEC_BIOS_CMD_TABLE(DAC_LoadDetection, params)) + result = BP_RESULT_OK; + + return result; +} + +static enum bp_result dac_load_detection_v3( + struct bios_parser *bp, + struct bp_load_detection_parameters *bp_params) +{ + enum bp_result result = BP_RESULT_FAILURE; + DAC_LOAD_DETECTION_PS_ALLOCATION params; + + uint8_t misc = 0; + + if (bp_params->device_id == ATOM_DEVICE_CV_SUPPORT || + bp_params->device_id == ATOM_DEVICE_TV1_SUPPORT) + misc = DAC_LOAD_MISC_YPrPb; + + dac_load_detect_prepare_params( + ¶ms, + bp_params->engine_id, + bp_params->device_id, + misc); + + if (EXEC_BIOS_CMD_TABLE(DAC_LoadDetection, params)) + result = BP_RESULT_OK; + + return result; +} + /******************************************************************************* ******************************************************************************** ** diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.h b/drivers/gpu/drm/amd/display/dc/bios/command_table.h index 8b04b903e93d..e89b1ba0048b 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.h @@ -71,6 +71,9 @@ struct cmd_tbl { enum bp_result (*dac2_output_control)( struct bios_parser *bp, bool enable); + enum bp_result (*dac_load_detection)( + struct bios_parser *bp, + struct bp_load_detection_parameters *bp_params); enum bp_result (*set_crtc_timing)( struct bios_parser *bp, struct bp_hw_crtc_timing_parameters *bp_params); diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 50c8906b74c5..40d7a7d83c40 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -97,6 +97,11 @@ struct dc_vbios_funcs { enum bp_result (*encoder_control)( struct dc_bios *bios, struct bp_encoder_control *cntl); + enum bp_result (*dac_load_detection)( + struct dc_bios *bios, + enum engine_id engine_id, + enum dal_device_type device_type, + uint32_t enum_id); enum bp_result (*transmitter_control)( struct dc_bios *bios, struct bp_transmitter_control *cntl); diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index d9e58a6a0d36..973b6bdbac63 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -162,6 +162,11 @@ struct bp_transmitter_control { bool single_pll_mode; }; +struct bp_load_detection_parameters { + enum engine_id engine_id; + uint16_t device_id; +}; + struct bp_hw_crtc_timing_parameters { enum controller_id controller_id; /* horizontal part */ -- cgit v1.2.3 From ac1bb49522676dfbd83c323376f8d5daa5b1ba84 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:02:00 +0200 Subject: drm/amd/display: Use DAC load detection on analog connectors (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This feature is useful for analog connections without EDID: - Really old monitors with a VGA connector - Cheap DVI/VGA adapters that don't connect DDC pins When a connection is established through DAC load detection, the driver is supposed to fill in the supported modes for the display, which we already do in amdgpu_dm_connector_get_modes. Also, because the load detection causes visible glitches, do not attempt to poll the connector again after it was detected this way. Note that it will still be polled after sleep/resume or when force is enabled, which is okay. v2: Add dc_connection_dac_load connection type. Properly release sink when no display is connected. Don't print error when EDID isn't read from an analog display. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++ drivers/gpu/drm/amd/display/dc/dc_types.h | 3 +- .../gpu/drm/amd/display/dc/link/link_detection.c | 61 +++++++++++++++++++++- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d18d72a520f2..a235a5b61186 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7248,6 +7248,16 @@ amdgpu_dm_connector_poll(struct amdgpu_dm_connector *aconnector, bool force) enum dc_connection_type conn_type = dc_connection_none; enum drm_connector_status status = connector_status_disconnected; + /* When we determined the connection using DAC load detection, + * do NOT poll the connector do detect disconnect because + * that would run DAC load detection again which can cause + * visible visual glitches. + * + * Only allow to poll such a connector again when forcing. + */ + if (!force && link->local_sink && link->type == dc_connection_dac_load) + return connector->status; + mutex_lock(&aconnector->hpd_lock); if (dc_link_detect_connection_type(aconnector->dc_link, &conn_type) && diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9989c8af5174..ea6b71c43d2c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -353,7 +353,8 @@ enum dc_connection_type { dc_connection_none, dc_connection_single, dc_connection_mst_branch, - dc_connection_sst_branch + dc_connection_sst_branch, + dc_connection_dac_load }; struct dc_csc_adjustments { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index d83fd52cb1b0..c417780f37bc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -904,6 +904,37 @@ static bool link_detect_ddc_probe(struct dc_link *link) return true; } +/** + * link_detect_dac_load_detect() - Performs DAC load detection. + * + * Load detection can be used to detect the presence of an + * analog display when we can't read DDC. This causes a visible + * visual glitch so it should be used sparingly. + */ +static bool link_detect_dac_load_detect(struct dc_link *link) +{ + struct dc_bios *bios = link->ctx->dc_bios; + struct link_encoder *link_enc = link->link_enc; + enum engine_id engine_id = link_enc->preferred_engine; + enum dal_device_type device_type = DEVICE_TYPE_CRT; + enum bp_result bp_result; + uint32_t enum_id; + + switch (engine_id) { + case ENGINE_ID_DACB: + enum_id = 2; + break; + case ENGINE_ID_DACA: + default: + engine_id = ENGINE_ID_DACA; + enum_id = 1; + break; + } + + bp_result = bios->funcs->dac_load_detection(bios, engine_id, device_type, enum_id); + return bp_result == BP_RESULT_OK; +} + /* * detect_link_and_local_sink() - Detect if a sink is attached to a given link * @@ -1118,7 +1149,30 @@ static bool detect_link_and_local_sink(struct dc_link *link, DC_LOG_ERROR("Partial EDID valid, abandon invalid blocks.\n"); break; case EDID_NO_RESPONSE: + /* Analog connectors without EDID: + * - old monitor that actually doesn't have EDID + * - cheap DVI-A cable or adapter that doesn't connect DDC + */ + if (dc_connector_supports_analog(link->link_id.id)) { + /* If we didn't do DAC load detection yet, do it now + * to verify there really is a display connected. + */ + if (link->type != dc_connection_dac_load && + !link_detect_dac_load_detect(link)) { + if (prev_sink) + dc_sink_release(prev_sink); + link_disconnect_sink(link); + return false; + } + + DC_LOG_INFO("%s detected analog display without EDID\n", __func__); + link->type = dc_connection_dac_load; + sink->edid_caps.analog = true; + break; + } + DC_LOG_ERROR("No EDID read.\n"); + /* * Abort detection for non-DP connectors if we have * no EDID @@ -1307,6 +1361,11 @@ static bool link_detect_analog(struct dc_link *link, enum dc_connection_type *ty return true; } + if (link_detect_dac_load_detect(link)) { + *type = dc_connection_dac_load; + return true; + } + *type = dc_connection_none; return true; } @@ -1328,7 +1387,7 @@ bool link_detect_connection_type(struct dc_link *link, enum dc_connection_type * } /* Ignore the HPD pin (if any) for analog connectors. - * Instead rely on DDC. + * Instead rely on DDC and DAC. * * - VGA connectors don't have any HPD at all. * - Some DVI-A cables don't connect the HPD pin. -- cgit v1.2.3 From 70181ad96ec22c7ade7e373578b7a1a298035acd Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:02:01 +0200 Subject: drm/amd/display: Add common modes to analog displays without EDID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the EDID of an analog display is not available, we can't know the possible modes supported by the display. However, we still need to offer the user to select from a variety of common modes. It will be up to the user to select the best one, though. This is how it works on other operating systems as well as the legacy display code path in amdgpu. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 46 ++++++++++++++--------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a235a5b61186..dc091ccab46e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8412,7 +8412,7 @@ static void amdgpu_dm_get_native_mode(struct drm_connector *connector) static struct drm_display_mode * amdgpu_dm_create_common_mode(struct drm_encoder *encoder, - char *name, + const char *name, int hdisplay, int vdisplay) { struct drm_device *dev = encoder->dev; @@ -8434,6 +8434,24 @@ amdgpu_dm_create_common_mode(struct drm_encoder *encoder, } +static const struct amdgpu_dm_mode_size { + char name[DRM_DISPLAY_MODE_LEN]; + int w; + int h; +} common_modes[] = { + { "640x480", 640, 480}, + { "800x600", 800, 600}, + { "1024x768", 1024, 768}, + { "1280x720", 1280, 720}, + { "1280x800", 1280, 800}, + {"1280x1024", 1280, 1024}, + { "1440x900", 1440, 900}, + {"1680x1050", 1680, 1050}, + {"1600x1200", 1600, 1200}, + {"1920x1080", 1920, 1080}, + {"1920x1200", 1920, 1200} +}; + static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, struct drm_connector *connector) { @@ -8444,23 +8462,6 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, to_amdgpu_dm_connector(connector); int i; int n; - struct mode_size { - char name[DRM_DISPLAY_MODE_LEN]; - int w; - int h; - } common_modes[] = { - { "640x480", 640, 480}, - { "800x600", 800, 600}, - { "1024x768", 1024, 768}, - { "1280x720", 1280, 720}, - { "1280x800", 1280, 800}, - {"1280x1024", 1280, 1024}, - { "1440x900", 1440, 900}, - {"1680x1050", 1680, 1050}, - {"1600x1200", 1600, 1200}, - {"1920x1080", 1920, 1080}, - {"1920x1200", 1920, 1200} - }; if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) && (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) @@ -8684,6 +8685,15 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) if (dc->link_srv->dp_get_encoding_format(verified_link_cap) == DP_128b_132b_ENCODING) amdgpu_dm_connector->num_modes += drm_add_modes_noedid(connector, 1920, 1080); + + if (amdgpu_dm_connector->dc_sink->edid_caps.analog) { + /* Analog monitor connected by DAC load detection. + * Add common modes. It will be up to the user to select one that works. + */ + for (int i = 0; i < ARRAY_SIZE(common_modes); i++) + amdgpu_dm_connector->num_modes += drm_add_modes_noedid( + connector, common_modes[i].w, common_modes[i].h); + } } else { amdgpu_dm_connector_ddc_get_modes(connector, drm_edid); if (encoder) -- cgit v1.2.3 From a9466f63ef36aa42fa989cfe50e30c07992b7d25 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:02:02 +0200 Subject: drm/amd/display: Don't add freesync modes to analog displays (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VRR is not supported on analog signals. Don't add freesync modes to analog displays or when VRR is unsupported by DC. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dc091ccab46e..cce82deff51e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8662,6 +8662,10 @@ static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connect if (!(amdgpu_freesync_vid_mode && drm_edid)) return; + if (!amdgpu_dm_connector->dc_sink || amdgpu_dm_connector->dc_sink->edid_caps.analog || + !dc_supports_vrr(amdgpu_dm_connector->dc_sink->ctx->dce_version)) + return; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) amdgpu_dm_connector->num_modes += add_fs_modes(amdgpu_dm_connector); -- cgit v1.2.3 From f44aad39b662eb9058dc5123168d2502d31d0ab3 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Fri, 26 Sep 2025 20:02:03 +0200 Subject: drm/amdgpu: Use DC by default for Bonaire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that DC supports analog connectors, there is nothing stopping us from using it by default on Bonaire. Signed-off-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b8d91247f51a..b92234d63562 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4215,7 +4215,6 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, #else return false; #endif - case CHIP_BONAIRE: case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: -- cgit v1.2.3 From fea8f13f4f9f1f824a998624d6ec20cab725b5f5 Mon Sep 17 00:00:00 2001 From: Sunday Clement Date: Fri, 17 Oct 2025 10:15:50 -0400 Subject: drm/amdkfd: Fix Unchecked Return Value Properly check the return values for function, as done elsewhere. Signed-off-by: Sunday Clement Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6e7bc983fc0b..4fbe865ff279 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1897,6 +1897,8 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { + int ret = 0; + dqm_lock(dqm); if (!dqm->sched_running) { dqm_unlock(dqm); @@ -1904,9 +1906,10 @@ static int stop_cpsch(struct device_queue_manager *dqm) } if (!dqm->dev->kfd->shared_resources.enable_mes) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); + ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD, false); else - remove_all_kfd_queues_mes(dqm); + ret = remove_all_kfd_queues_mes(dqm); dqm->sched_running = false; @@ -1920,7 +1923,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) dqm->detect_hang_info = NULL; dqm_unlock(dqm); - return 0; + return ret; } static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, -- cgit v1.2.3 From 849ad2a3008957a4e1d846507799bd169f0e5d9c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 11 Aug 2025 17:30:40 +0530 Subject: drm/amdgpu/pm: Add definition for gpu_metrics v1.9 Add gpu metrics definition which is only a set of gpu metrics attributes. A field is encoded by its id, type and number of instances. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 117 +++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 87814c2b526e..f92f78d5d330 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -532,6 +532,110 @@ struct metrics_table_header { uint8_t content_revision; }; +enum amdgpu_metrics_attr_id { + AMDGPU_METRICS_ATTR_ID_TEMPERATURE_HOTSPOT, + AMDGPU_METRICS_ATTR_ID_TEMPERATURE_MEM, + AMDGPU_METRICS_ATTR_ID_TEMPERATURE_VRSOC, + AMDGPU_METRICS_ATTR_ID_CURR_SOCKET_POWER, + AMDGPU_METRICS_ATTR_ID_AVERAGE_GFX_ACTIVITY, + AMDGPU_METRICS_ATTR_ID_AVERAGE_UMC_ACTIVITY, + AMDGPU_METRICS_ATTR_ID_MEM_MAX_BANDWIDTH, + AMDGPU_METRICS_ATTR_ID_ENERGY_ACCUMULATOR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_CLOCK_COUNTER, + AMDGPU_METRICS_ATTR_ID_ACCUMULATION_COUNTER, + AMDGPU_METRICS_ATTR_ID_PROCHOT_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_ID_PPT_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_ID_SOCKET_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_ID_VR_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_ID_HBM_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_ID_GFXCLK_LOCK_STATUS, + AMDGPU_METRICS_ATTR_ID_PCIE_LINK_WIDTH, + AMDGPU_METRICS_ATTR_ID_PCIE_LINK_SPEED, + AMDGPU_METRICS_ATTR_ID_XGMI_LINK_WIDTH, + AMDGPU_METRICS_ATTR_ID_XGMI_LINK_SPEED, + AMDGPU_METRICS_ATTR_ID_GFX_ACTIVITY_ACC, + AMDGPU_METRICS_ATTR_ID_MEM_ACTIVITY_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_BANDWIDTH_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_BANDWIDTH_INST, + AMDGPU_METRICS_ATTR_ID_PCIE_L0_TO_RECOV_COUNT_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_REPLAY_COUNT_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_REPLAY_ROVER_COUNT_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_NAK_SENT_COUNT_ACC, + AMDGPU_METRICS_ATTR_ID_PCIE_NAK_RCVD_COUNT_ACC, + AMDGPU_METRICS_ATTR_ID_XGMI_READ_DATA_ACC, + AMDGPU_METRICS_ATTR_ID_XGMI_WRITE_DATA_ACC, + AMDGPU_METRICS_ATTR_ID_XGMI_LINK_STATUS, + AMDGPU_METRICS_ATTR_ID_FIRMWARE_TIMESTAMP, + AMDGPU_METRICS_ATTR_ID_CURRENT_GFXCLK, + AMDGPU_METRICS_ATTR_ID_CURRENT_SOCCLK, + AMDGPU_METRICS_ATTR_ID_CURRENT_VCLK0, + AMDGPU_METRICS_ATTR_ID_CURRENT_DCLK0, + AMDGPU_METRICS_ATTR_ID_CURRENT_UCLK, + AMDGPU_METRICS_ATTR_ID_NUM_PARTITION, + AMDGPU_METRICS_ATTR_ID_PCIE_LC_PERF_OTHER_END_RECOVERY, + AMDGPU_METRICS_ATTR_ID_GFX_BUSY_INST, + AMDGPU_METRICS_ATTR_ID_JPEG_BUSY, + AMDGPU_METRICS_ATTR_ID_VCN_BUSY, + AMDGPU_METRICS_ATTR_ID_GFX_BUSY_ACC, + AMDGPU_METRICS_ATTR_ID_GFX_BELOW_HOST_LIMIT_PPT_ACC, + AMDGPU_METRICS_ATTR_ID_GFX_BELOW_HOST_LIMIT_THM_ACC, + AMDGPU_METRICS_ATTR_ID_GFX_LOW_UTILIZATION_ACC, + AMDGPU_METRICS_ATTR_ID_GFX_BELOW_HOST_LIMIT_TOTAL_ACC, + AMDGPU_METRICS_ATTR_ID_MAX, +}; + +enum amdgpu_metrics_attr_type { + AMDGPU_METRICS_TYPE_U8, + AMDGPU_METRICS_TYPE_S8, + AMDGPU_METRICS_TYPE_U16, + AMDGPU_METRICS_TYPE_S16, + AMDGPU_METRICS_TYPE_U32, + AMDGPU_METRICS_TYPE_S32, + AMDGPU_METRICS_TYPE_U64, + AMDGPU_METRICS_TYPE_S64, + AMDGPU_METRICS_TYPE_MAX, +}; + +enum amdgpu_metrics_attr_unit { + /* None */ + AMDGPU_METRICS_UNIT_NONE, + /* MHz*/ + AMDGPU_METRICS_UNIT_CLOCK_1, + /* Degree Celsius*/ + AMDGPU_METRICS_UNIT_TEMP_1, + /* Watts*/ + AMDGPU_METRICS_UNIT_POWER_1, + /* In nanoseconds*/ + AMDGPU_METRICS_UNIT_TIME_1, + /* In 10 nanoseconds*/ + AMDGPU_METRICS_UNIT_TIME_2, + /* Speed in GT/s */ + AMDGPU_METRICS_UNIT_SPEED_1, + /* Speed in 0.1 GT/s */ + AMDGPU_METRICS_UNIT_SPEED_2, + /* Bandwidth GB/s */ + AMDGPU_METRICS_UNIT_BW_1, + /* Data in KB */ + AMDGPU_METRICS_UNIT_DATA_1, + /* Percentage */ + AMDGPU_METRICS_UNIT_PERCENT, + AMDGPU_METRICS_UNIT_MAX, +}; + +#define AMDGPU_METRICS_ATTR_UNIT_MASK 0xFF000000 +#define AMDGPU_METRICS_ATTR_UNIT_SHIFT 24 +#define AMDGPU_METRICS_ATTR_TYPE_MASK 0x00F00000 +#define AMDGPU_METRICS_ATTR_TYPE_SHIFT 20 +#define AMDGPU_METRICS_ATTR_ID_MASK 0x000FFC00 +#define AMDGPU_METRICS_ATTR_ID_SHIFT 10 +#define AMDGPU_METRICS_ATTR_INST_MASK 0x000003FF +#define AMDGPU_METRICS_ATTR_INST_SHIFT 0 + +#define AMDGPU_METRICS_ENC_ATTR(unit, type, id, inst) \ + (((u64)(unit) << AMDGPU_METRICS_ATTR_UNIT_SHIFT) | \ + ((u64)(type) << AMDGPU_METRICS_ATTR_TYPE_SHIFT) | \ + ((u64)(id) << AMDGPU_METRICS_ATTR_ID_SHIFT) | (inst)) + /* * gpu_metrics_v1_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v1_1 or later instead. @@ -1221,6 +1325,19 @@ struct gpu_metrics_v1_8 { uint32_t pcie_lc_perf_other_end_recovery; }; +struct gpu_metrics_attr { + /* Field type encoded with AMDGPU_METRICS_ENC_ATTR */ + uint64_t attr_encoding; + /* Attribute value, depends on attr_encoding */ + void *attr_value; +}; + +struct gpu_metrics_v1_9 { + struct metrics_table_header common_header; + int attr_count; + struct gpu_metrics_attr metrics_attrs[]; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. -- cgit v1.2.3 From ef4a4b878124ffe6afac5c7b1716308c6964bbdd Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 15:14:38 +0200 Subject: drm/amd: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-redundant explicit call to pm_runtime_mark_last_busy(). Acked-by: Alex Deucher Signed-off-by: Sakari Ailus Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 16 ++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 25 ----------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 1 - 14 files changed, 4 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 6c62e27b9800..d31460a9e958 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -507,7 +507,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, pm_runtime_get_sync(adev_to_drm(adev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(adev_to_drm(adev)); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 47e9bfba0642..9f96d568acf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -734,10 +734,8 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -919,10 +917,8 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1146,10 +1142,8 @@ out: amdgpu_connector_update_scratch_regs(connector, ret); exit: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1486,10 +1480,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector->connector_type == DRM_MODE_CONNECTOR_eDP) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index d3a5189bd512..62d43b8cbe58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -129,7 +129,6 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return -EINVAL; @@ -179,7 +178,6 @@ end: if (pm_pg_lock) mutex_unlock(&adev->pm.mutex); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); @@ -255,7 +253,6 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off if (rd->id.use_grbm) { if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) || (rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); mutex_unlock(&rd->lock); @@ -310,7 +307,6 @@ end: mutex_unlock(&rd->lock); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); @@ -446,7 +442,6 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id); mutex_unlock(&adev->grbm_idx_mutex); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); if (!x) { @@ -557,7 +552,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -617,7 +611,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -676,7 +669,6 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -736,7 +728,6 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -795,7 +786,6 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -855,7 +845,6 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); amdgpu_virt_disable_access_debugfs(adev); return r; @@ -1003,7 +992,6 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); if (r) { @@ -1094,7 +1082,6 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); if (!x) { @@ -1192,7 +1179,6 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); while (size) { @@ -1266,7 +1252,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1315,7 +1300,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1365,7 +1349,6 @@ static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1414,7 +1397,6 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1460,7 +1442,6 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1501,7 +1482,6 @@ static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *bu r = result; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; @@ -1701,7 +1681,6 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) up_write(&adev->reset_domain->sem); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; @@ -1721,7 +1700,6 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val) *val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; @@ -1742,7 +1720,6 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val) *val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; @@ -1762,7 +1739,6 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val) r = amdgpu_benchmark(adev, val); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return r; @@ -2014,7 +1990,6 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) ret = -EINVAL; out: - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c74b95bd41b5..b5d34797d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -332,8 +332,6 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, if (crtc->enabled) active = true; - pm_runtime_mark_last_busy(dev->dev); - adev = drm_to_adev(dev); /* if we have active crtcs and we don't have a power ref, * take the current one diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cee90f9e58a9..3776901bbb1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2228,7 +2228,6 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) adev->pdev->bus->number, i); if (p) { pm_runtime_get_sync(&p->dev); - pm_runtime_mark_last_busy(&p->dev); pm_runtime_put_autosuspend(&p->dev); pci_dev_put(p); } @@ -2474,7 +2473,6 @@ retry_init: pm_runtime_allow(ddev->dev); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); pci_wake_from_d3(pdev, TRUE); @@ -2920,7 +2918,6 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) ret = amdgpu_runtime_idle_check_userq(dev); done: - pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return ret; } @@ -2956,7 +2953,6 @@ long amdgpu_drm_ioctl(struct file *filp, ret = drm_ioctl(filp, cmd, arg); - pm_runtime_mark_last_busy(dev->dev); out: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1fe31d2f2706..c7843e336310 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -250,7 +250,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } while (last_seq != seq); @@ -928,7 +927,6 @@ static int gpu_recover_get(void *data, u64 *val) *val = atomic_read(&adev->reset_domain->reset_res); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 29c927f4d6df..8b118c53f351 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1670,7 +1670,6 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, ret = amdgpu_gfx_run_cleaner_shader(adev, value); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b3e6b3fcdf2c..6ee77f431d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1471,7 +1471,6 @@ error_pasid: kfree(fpriv); out_suspend: - pm_runtime_mark_last_busy(dev->dev); pm_put: pm_runtime_put_autosuspend(dev->dev); @@ -1539,7 +1538,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, kfree(fpriv); file_priv->driver_priv = NULL; - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c index 123bcf5c2bb1..bacf888735db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c @@ -101,7 +101,6 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf, } amdgpu_gfx_off_ctrl(adev, true); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7f899d3e1bfa..c8b4dd3ea5c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4151,7 +4151,6 @@ static void amdgpu_ras_counte_dw(struct work_struct *work) atomic_set(&con->ras_ue_count, ue_count); } - pm_runtime_mark_last_busy(dev->dev); Out: pm_runtime_put_autosuspend(dev->dev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index 41ebe690eeff..3739be1b71e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -159,7 +159,6 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u dev_err(adev->dev, "Invalid input: %s\n", str); } - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 3b1811b110a4..13cc5a686dfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -488,7 +488,6 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) amdgpu_userq_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index f5d173f1ca3b..a085faac9fe1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1083,7 +1083,6 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) * for auto suspend */ if (pdd->runtime_inuse) { - pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev); pdd->runtime_inuse = false; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index c83d69994380..c88a76cce401 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -174,7 +174,6 @@ static int amdgpu_pm_get_access_if_active(struct amdgpu_device *adev) */ static inline void amdgpu_pm_put_access(struct amdgpu_device *adev) { - pm_runtime_mark_last_busy(adev->dev); pm_runtime_put_autosuspend(adev->dev); } -- cgit v1.2.3 From 1bc9d39275e08853ff15410b4d530b46b546affb Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 27 Oct 2025 15:14:40 +0200 Subject: drm/radeon: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-redundant explicit call to pm_runtime_mark_last_busy(). Acked-by: Alex Deucher Signed-off-by: Sakari Ailus Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_acpi.c | 1 - drivers/gpu/drm/radeon/radeon_connectors.c | 20 +++++--------------- drivers/gpu/drm/radeon/radeon_display.c | 2 -- drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/radeon/radeon_fbdev.c | 2 -- drivers/gpu/drm/radeon/radeon_kms.c | 4 ---- 6 files changed, 5 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 22ce61bdfc06..08f8ba4fd148 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -408,7 +408,6 @@ static int radeon_atif_handler(struct radeon_device *rdev, pm_runtime_get_sync(rdev_to_drm(rdev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(rdev_to_drm(rdev)); - pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); } } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 9f6a3df951ba..012d8b2295b8 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -875,10 +875,8 @@ radeon_lvds_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1066,10 +1064,8 @@ radeon_vga_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1154,10 +1150,8 @@ radeon_tv_detect(struct drm_connector *connector, bool force) ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false); radeon_connector_update_scratch_regs(connector, ret); - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1402,10 +1396,8 @@ out: } exit: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } @@ -1714,10 +1706,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force) } out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) pm_runtime_put_autosuspend(connector->dev->dev); - } return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 351b9dfcdad8..35fb99bcd9a7 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -644,8 +644,6 @@ radeon_crtc_set_config(struct drm_mode_set *set, if (crtc->enabled) active = true; - pm_runtime_mark_last_busy(dev->dev); - rdev = dev->dev_private; /* if we have active crtcs and we don't have a power ref, take the current one */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 350f88af888d..26ad9adc5d8c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -462,7 +462,6 @@ static int radeon_pmops_runtime_idle(struct device *dev) } } - pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ return 1; @@ -484,7 +483,6 @@ long radeon_drm_ioctl(struct file *filp, ret = drm_ioctl(filp, cmd, arg); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index dc81b0c2dbff..4734dbd79c60 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -154,7 +154,6 @@ static int radeon_fbdev_fb_open(struct fb_info *info, int user) return 0; err_pm_runtime_mark_last_busy: - pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return ret; } @@ -164,7 +163,6 @@ static int radeon_fbdev_fb_release(struct fb_info *info, int user) struct drm_fb_helper *fb_helper = info->par; struct radeon_device *rdev = fb_helper->dev->dev_private; - pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return 0; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index ba1446acd703..7cbe02ffb193 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -169,7 +169,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); pm_runtime_allow(dev->dev); - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); } @@ -676,7 +675,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) file_priv->driver_priv = fpriv; } - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; @@ -686,7 +684,6 @@ err_fpriv: kfree(fpriv); err_suspend: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return r; } @@ -736,7 +733,6 @@ void radeon_driver_postclose_kms(struct drm_device *dev, kfree(fpriv); file_priv->driver_priv = NULL; } - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); } -- cgit v1.2.3 From 0e64ee70ed8fb25e1a66502cf940017b9b2cc703 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:05 +0300 Subject: drm/i915: Rewrite icl_min_plane_width() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the ginormous switch statement in icl_plane_min_width() with simple arithmetic. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-2-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 43 +++++----------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 765d288cce2b..e16e42baf86c 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -394,40 +394,17 @@ static int icl_plane_min_width(const struct drm_framebuffer *fb, int color_plane, unsigned int rotation) { + int min_width; + + min_width = 16 / fb->format->cpp[color_plane]; + /* Wa_14011264657, Wa_14011050563: gen11+ */ - switch (fb->format->format) { - case DRM_FORMAT_C8: - return 18; - case DRM_FORMAT_RGB565: - return 10; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_ABGR8888: - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ARGB2101010: - case DRM_FORMAT_ABGR2101010: - case DRM_FORMAT_XVYU2101010: - case DRM_FORMAT_Y212: - case DRM_FORMAT_Y216: - return 6; - case DRM_FORMAT_NV12: - return 20; - case DRM_FORMAT_P010: - case DRM_FORMAT_P012: - case DRM_FORMAT_P016: - return 12; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_ABGR16161616F: - case DRM_FORMAT_XVYU12_16161616: - case DRM_FORMAT_XVYU16161616: - return 4; - default: - return 1; - } + if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) + min_width += 4; + else + min_width += 2; + + return min_width; } static int xe3_plane_max_width(const struct drm_framebuffer *fb, -- cgit v1.2.3 From 369c8f97fbf90a1a90b109e23e4e8c3474651bcb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:06 +0300 Subject: drm/i915: Drop the min plane width w/a adl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADL+ no longer need the plane min width w/a (Wa_14011264657 or Wa_14011050563). Don't apply it there. DG2 still needs it. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-3-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e16e42baf86c..a50d9c48b8ca 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -390,6 +390,13 @@ static int glk_plane_max_width(const struct drm_framebuffer *fb, } } +static int adl_plane_min_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + return 16 / fb->format->cpp[color_plane]; +} + static int icl_plane_min_width(const struct drm_framebuffer *fb, int color_plane, unsigned int rotation) @@ -2816,7 +2823,10 @@ skl_universal_plane_create(struct intel_display *display, plane->max_height = icl_plane_max_height; plane->min_cdclk = icl_plane_min_cdclk; } else if (DISPLAY_VER(display) >= 11) { - plane->min_width = icl_plane_min_width; + if (DISPLAY_VER(display) >= 14 || display->platform.alderlake_p) + plane->min_width = adl_plane_min_width; + else + plane->min_width = icl_plane_min_width; if (icl_is_hdr_plane(display, plane_id)) plane->max_width = icl_hdr_plane_max_width; else -- cgit v1.2.3 From 299843cd7b907f55aa8c4f1affe7ec5cf53be691 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:07 +0300 Subject: drm/i915: Implement .min_plane_width() for PTL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PTL+ spposedly still has the same plane min width limit as ADL. Check for it. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-4-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index a50d9c48b8ca..34b193d56763 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2819,6 +2819,7 @@ skl_universal_plane_create(struct intel_display *display, intel_fbc_add_plane(skl_plane_fbc(display, pipe, plane_id), plane); if (DISPLAY_VER(display) >= 30) { + plane->min_width = adl_plane_min_width; plane->max_width = xe3_plane_max_width; plane->max_height = icl_plane_max_height; plane->min_cdclk = icl_plane_min_cdclk; -- cgit v1.2.3 From 057ec016636b2a76dc10dd7d3dd186ed6809ccdb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:08 +0300 Subject: drm/i915: Start checking plane min size for the chroma plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we check the plane min size only for the Y plane. Extend the check to the CbCr plane as well. This will also allow us to remove the planar format check from icl_plane_min_width() since the +2 on the CbCr plane is equivalent to +4 on the Y plane. I suspect this approach actually models the hardware issue more accurately. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-5-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 34b193d56763..c657565a6b91 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -401,17 +401,8 @@ static int icl_plane_min_width(const struct drm_framebuffer *fb, int color_plane, unsigned int rotation) { - int min_width; - - min_width = 16 / fb->format->cpp[color_plane]; - /* Wa_14011264657, Wa_14011050563: gen11+ */ - if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) - min_width += 4; - else - min_width += 2; - - return min_width; + return 16 / fb->format->cpp[color_plane] + 2; } static int xe3_plane_max_width(const struct drm_framebuffer *fb, @@ -2071,6 +2062,7 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) int uv_plane = 1; int ccs_plane = intel_fb_is_ccs_modifier(fb->modifier) ? skl_main_to_aux_plane(fb, uv_plane) : 0; + int min_width = intel_plane_min_width(plane, fb, uv_plane, rotation); int max_width = intel_plane_max_width(plane, fb, uv_plane, rotation); int max_height = intel_plane_max_height(plane, fb, uv_plane, rotation); int x = plane_state->uapi.src.x1 >> 17; @@ -2080,11 +2072,11 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) u32 offset; /* FIXME not quite sure how/if these apply to the chroma plane */ - if (w > max_width || h > max_height) { + if (w > max_width || w < min_width || h > max_height || h < 1) { drm_dbg_kms(display->drm, - "[PLANE:%d:%s] CbCr source size %dx%d too big (limit %dx%d)\n", + "[PLANE:%d:%s] requested CbCr source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", plane->base.base.id, plane->base.name, - w, h, max_width, max_height); + w, h, min_width, max_width, max_height); return -EINVAL; } -- cgit v1.2.3 From 13a3118b267c7848db73b3f3bc7693e1a093af8d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:09 +0300 Subject: drm/i915: Introduce intel_plane_min_height() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the skl+ plane size checks a bit more regular by adding intel_plane_min_height() instead of using a hardcoded 1 everwhere. v2: s/1/min_height/ one more time Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-6-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index c657565a6b91..1183c33ae8da 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1860,6 +1860,14 @@ static int intel_plane_min_width(struct intel_plane *plane, return 1; } +static int intel_plane_min_height(struct intel_plane *plane, + const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + return 1; +} + static int intel_plane_max_width(struct intel_plane *plane, const struct drm_framebuffer *fb, int color_plane, @@ -1991,6 +1999,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) int w = drm_rect_width(&plane_state->uapi.src) >> 16; int h = drm_rect_height(&plane_state->uapi.src) >> 16; int min_width = intel_plane_min_width(plane, fb, 0, rotation); + int min_height = intel_plane_min_height(plane, fb, 0, rotation); int max_width = intel_plane_max_width(plane, fb, 0, rotation); int max_height = intel_plane_max_height(plane, fb, 0, rotation); unsigned int alignment = plane->min_alignment(plane, fb, 0); @@ -1998,11 +2007,11 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) u32 offset; int ret; - if (w > max_width || w < min_width || h > max_height || h < 1) { + if (w > max_width || w < min_width || h > max_height || h < min_height) { drm_dbg_kms(display->drm, - "[PLANE:%d:%s] requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", + "[PLANE:%d:%s] requested Y/RGB source size %dx%d outside limits (min: %dx%d max: %dx%d)\n", plane->base.base.id, plane->base.name, - w, h, min_width, max_width, max_height); + w, h, min_width, min_height, max_width, max_height); return -EINVAL; } @@ -2063,6 +2072,7 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) int ccs_plane = intel_fb_is_ccs_modifier(fb->modifier) ? skl_main_to_aux_plane(fb, uv_plane) : 0; int min_width = intel_plane_min_width(plane, fb, uv_plane, rotation); + int min_height = intel_plane_min_height(plane, fb, uv_plane, rotation); int max_width = intel_plane_max_width(plane, fb, uv_plane, rotation); int max_height = intel_plane_max_height(plane, fb, uv_plane, rotation); int x = plane_state->uapi.src.x1 >> 17; @@ -2072,11 +2082,11 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) u32 offset; /* FIXME not quite sure how/if these apply to the chroma plane */ - if (w > max_width || w < min_width || h > max_height || h < 1) { + if (w > max_width || w < min_width || h > max_height || h < min_height) { drm_dbg_kms(display->drm, - "[PLANE:%d:%s] requested CbCr source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", + "[PLANE:%d:%s] requested CbCr source size %dx%d outside limits (min: %dx%d max: %dx%d)\n", plane->base.base.id, plane->base.name, - w, h, min_width, max_width, max_height); + w, h, min_width, min_height, max_width, max_height); return -EINVAL; } -- cgit v1.2.3 From 50e6b6ad48066df6d7847ccc17fe77f400b39d79 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:10 +0300 Subject: drm/i915: Remove pointless crtc hw.enable check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to check for crtc hw.enable because that would also imply that the plane is disabled and we would have bailed out already earlier. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-7-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 1183c33ae8da..3d5ed1581cb5 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1756,8 +1756,7 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, } /* Y-tiling is not supported in IF-ID Interlace mode */ - if (crtc_state->hw.enable && - crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE && + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE && fb->modifier != DRM_FORMAT_MOD_LINEAR && fb->modifier != I915_FORMAT_MOD_X_TILED) { drm_dbg_kms(display->drm, -- cgit v1.2.3 From 744b861b37043f7ddec214e3fb648ea4fac707cb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:11 +0300 Subject: drm/i915: Extract glk_plane_has_planar() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract glk_plane_has_planar() out from skl_plane_has_planar() to make the logic a bit less convoluted. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-8-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 3d5ed1581cb5..2cc9ed49a191 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2425,7 +2425,7 @@ static bool skl_plane_has_planar(struct intel_display *display, if (display->platform.skylake || display->platform.broxton) return false; - if (DISPLAY_VER(display) == 9 && pipe == PIPE_C) + if (pipe == PIPE_C) return false; if (plane_id != PLANE_1 && plane_id != PLANE_2) @@ -2447,11 +2447,20 @@ static const u32 *skl_get_plane_formats(struct intel_display *display, } } +static bool glk_plane_has_planar(struct intel_display *display, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id != PLANE_1 && plane_id != PLANE_2) + return false; + + return true; +} + static const u32 *glk_get_plane_formats(struct intel_display *display, enum pipe pipe, enum plane_id plane_id, int *num_formats) { - if (skl_plane_has_planar(display, pipe, plane_id)) { + if (glk_plane_has_planar(display, pipe, plane_id)) { *num_formats = ARRAY_SIZE(glk_planar_formats); return glk_planar_formats; } else { -- cgit v1.2.3 From 938c6c9be35e438c9c6b77c08f2d5f20c212cf6e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 10 Oct 2025 00:13:12 +0300 Subject: drm/i915: Unify the logic in {skl,glk}_plane_has_*() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the similar logic in skl_plane_has_planar(), glk_plane_has_planar() and skl_plane_has_rc_ccs() to avoid having to think too much when comparing the three. Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20251009211313.30234-9-ville.syrjala@linux.intel.com Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 2cc9ed49a191..7663d6165093 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2428,10 +2428,7 @@ static bool skl_plane_has_planar(struct intel_display *display, if (pipe == PIPE_C) return false; - if (plane_id != PLANE_1 && plane_id != PLANE_2) - return false; - - return true; + return plane_id == PLANE_1 || plane_id == PLANE_2; } static const u32 *skl_get_plane_formats(struct intel_display *display, @@ -2450,10 +2447,7 @@ static const u32 *skl_get_plane_formats(struct intel_display *display, static bool glk_plane_has_planar(struct intel_display *display, enum pipe pipe, enum plane_id plane_id) { - if (plane_id != PLANE_1 && plane_id != PLANE_2) - return false; - - return true; + return plane_id == PLANE_1 || plane_id == PLANE_2; } static const u32 *glk_get_plane_formats(struct intel_display *display, @@ -2700,8 +2694,10 @@ skl_plane_disable_flip_done(struct intel_plane *plane) static bool skl_plane_has_rc_ccs(struct intel_display *display, enum pipe pipe, enum plane_id plane_id) { - return pipe != PIPE_C && - (plane_id == PLANE_1 || plane_id == PLANE_2); + if (pipe == PIPE_C) + return false; + + return plane_id == PLANE_1 || plane_id == PLANE_2; } static u8 skl_plane_caps(struct intel_display *display, -- cgit v1.2.3 From bbbfa70dfe00e78115474467903e28d6ac05be1f Mon Sep 17 00:00:00 2001 From: Naladala Ramanaidu Date: Fri, 17 Oct 2025 20:35:26 +0530 Subject: drm/i915: Add fallback for CDCLK selection when min_cdclk is too high In cases where the requested minimum CDCLK exceeds all available values for the current reference clock, the CDCLK selection logic previously returned 0. This could result coverity division or modulo by zero issue. Introduce a fallback mechanism that returns platform's max_cdclk_freq instead of 0. v2: Update safe fallback value to max cdclk. (Ville) v3: Update commit messgae (Mika) Signed-off-by: Naladala Ramanaidu Reviewed-by: Mika Kahola Signed-off-by: Suraj Kandpal Link: https://patch.msgid.link/20251017150526.781715-1-ramanaidu.naladala@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index e92e7fd9fd13..da97c38cca14 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1561,7 +1561,7 @@ static int bxt_calc_cdclk(struct intel_display *display, int min_cdclk) drm_WARN(display->drm, 1, "Cannot satisfy minimum cdclk %d with refclk %u\n", min_cdclk, display->cdclk.hw.ref); - return 0; + return display->cdclk.max_cdclk_freq; } static int bxt_calc_cdclk_pll_vco(struct intel_display *display, int cdclk) -- cgit v1.2.3 From 06a691f0e7d08fb52a6020bd5b1f1f9cd091aaae Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Oct 2025 13:35:36 +0200 Subject: drm/i915/dp_mst: Fix check for FEC support for an uncompressed stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reason for enabling FEC for an uncompressed stream on an MST link is that the DSC compression is enabled for another stream on the same link. For such an uncompressed stream FEC doesn't need to be supported on the whole path until the (DP-SST) sink DPRX. For instance if a branch device - like a monitor with an MST branch device within it - is plugged to a DFP connector of an MST docking station and the monitor's branch device does not support FEC, the docking station's branch device will still enable the link to the monitor correctly, disabling the FEC on that link as expected. Since it's been verified already that FEC is supported for the compressed stream above, the corresponding check for the uncompressed stream can be dropped: the check for the compressed stream implies already that FEC is supported on the link between the source DPTX and immediate downstream branch device. If FEC is not supported on the whole path until the sink DPRX, FEC will be disabled by a downstream branch device on the path as described above for the MST dock + MST monitor configuration example. This fixes a problem in the above MST dock + MST monitor example, where the dock supports FEC, but the monitor doesn't support it and FEC gets enabled on the link due to DSC getting enabled for another monitor's stream on the same link. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14254 Reviewed-by: Jouni Högander Signed-off-by: Imre Deak Link: https://patch.msgid.link/20251028113536.602352-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a845b2612a3f..21a60b8c880e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -299,7 +299,14 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, * intel_dp_needs_8b10b_fec(). */ crtc_state->fec_enable = intel_dp_needs_8b10b_fec(crtc_state, dsc); - if (crtc_state->fec_enable && + /* + * If FEC gets enabled only because of another compressed stream, FEC + * may not be supported for this uncompressed stream on the whole link + * path until the sink DPRX. In this case a downstream branch device + * will disable FEC for the uncompressed stream as expected and so the + * FEC support doesn't need to be checked for this uncompressed stream. + */ + if (crtc_state->fec_enable && dsc && !intel_dp_supports_fec(intel_dp, connector, crtc_state)) return -EINVAL; -- cgit v1.2.3 From e69b7a6bd4ea813282956873b63f8215f73ed966 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Thu, 23 Oct 2025 07:31:40 +0300 Subject: drm/i915/display: Take into account AS SDP in intel_dp_sdp_min_guardband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We started seeing "[drm] *ERROR* Timed out waiting PSR idle state" after taking optimized guardband into use. These are seen because VSC SDPs are sent on same line as AS SDPs when AS SDP is enabled. AS SDP is sent on line configured in EMP_AS_SDP_TL register. We are configuring crtc_state->vrr.vsync_start into that register. Fix this by ensuring AS SDP is sent on line which is within guardband. From the bspec: EMP_AS_SDP_TL < SCL + Guardband v2: check HAS_AS_SDP Bspec: 71197 Fixes: 52ecd48b8d3f ("drm/i915/dp: Add helper to get min sdp guardband") Cc: Ankit Nautiyal Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20251023043140.961104-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 475518b4048b..a3391b17571c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7023,7 +7023,7 @@ int intel_dp_compute_config_late(struct intel_encoder *encoder, } static -int intel_dp_get_lines_for_sdp(u32 type) +int intel_dp_get_lines_for_sdp(const struct intel_crtc_state *crtc_state, u32 type) { switch (type) { case DP_SDP_VSC_EXT_VESA: @@ -7033,6 +7033,8 @@ int intel_dp_get_lines_for_sdp(u32 type) return 8; case DP_SDP_PPS: return 7; + case DP_SDP_ADAPTIVE_SYNC: + return crtc_state->vrr.vsync_start + 1; default: break; } @@ -7043,17 +7045,25 @@ int intel_dp_get_lines_for_sdp(u32 type) int intel_dp_sdp_min_guardband(const struct intel_crtc_state *crtc_state, bool assume_all_enabled) { + struct intel_display *display = to_intel_display(crtc_state); int sdp_guardband = 0; if (assume_all_enabled || crtc_state->infoframes.enable & intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA)) sdp_guardband = max(sdp_guardband, - intel_dp_get_lines_for_sdp(HDMI_PACKET_TYPE_GAMUT_METADATA)); + intel_dp_get_lines_for_sdp(crtc_state, + HDMI_PACKET_TYPE_GAMUT_METADATA)); if (assume_all_enabled || crtc_state->dsc.compression_enable) - sdp_guardband = max(sdp_guardband, intel_dp_get_lines_for_sdp(DP_SDP_PPS)); + sdp_guardband = max(sdp_guardband, + intel_dp_get_lines_for_sdp(crtc_state, DP_SDP_PPS)); + + if ((assume_all_enabled && HAS_AS_SDP(display)) || + crtc_state->infoframes.enable & intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC)) + sdp_guardband = max(sdp_guardband, + intel_dp_get_lines_for_sdp(crtc_state, DP_SDP_ADAPTIVE_SYNC)); return sdp_guardband; } -- cgit v1.2.3 From d1ac2573f0085a5aab6f5d4cfa7b2818d703e5fe Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:37 +0530 Subject: drm/drm_crtc: Introduce sharpness strength property Introduce a new crtc property "SHARPNESS_STRENGTH" that allows the user to set the intensity so as to get the sharpness effect. The value of this property can be set from 0-255. It is useful in scenario when the output is blurry and user want to sharpen the pixels. User can increase/decrease the sharpness level depending on the content displayed. v2: Rename crtc property variable [Arun] Add modeset detail in uapi doc[Uma] v3: Fix build issue v4: Modify the subject line[Ankit] Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Tested-by: Adarsh G M Acked-by: Simona Vetter Link: https://invent.kde.org/plasma/kwin/-/merge_requests/7689 Link: https://patch.msgid.link/20251028120747.3027332-2-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_atomic_uapi.c | 4 ++++ drivers/gpu/drm/drm_crtc.c | 35 +++++++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 18 ++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 85dbdaa4a2e2..b2cb5ae5a139 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -419,6 +419,8 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, set_out_fence_for_crtc(state->state, crtc, fence_ptr); } else if (property == crtc->scaling_filter_property) { state->scaling_filter = val; + } else if (property == crtc->sharpness_strength_property) { + state->sharpness_strength = val; } else if (crtc->funcs->atomic_set_property) { return crtc->funcs->atomic_set_property(crtc, state, property, val); } else { @@ -456,6 +458,8 @@ drm_atomic_crtc_get_property(struct drm_crtc *crtc, *val = 0; else if (property == crtc->scaling_filter_property) *val = state->scaling_filter; + else if (property == crtc->sharpness_strength_property) + *val = state->sharpness_strength; else if (crtc->funcs->atomic_get_property) return crtc->funcs->atomic_get_property(crtc, state, property, val); else { diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 46655339003d..a7797d260f1e 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -229,6 +229,25 @@ struct dma_fence *drm_crtc_create_fence(struct drm_crtc *crtc) * Driver's default scaling filter * Nearest Neighbor: * Nearest Neighbor scaling filter + * SHARPNESS_STRENGTH: + * Atomic property for setting the sharpness strength/intensity by userspace. + * + * The value of this property is set as an integer value ranging + * from 0 - 255 where: + * + * 0: Sharpness feature is disabled(default value). + * + * 1: Minimum sharpness. + * + * 255: Maximum sharpness. + * + * User can gradually increase or decrease the sharpness level and can + * set the optimum value depending on content. + * This value will be passed to kernel through the UAPI. + * The setting of this property does not require modeset. + * The sharpness effect takes place post blending on the final composed output. + * If the feature is disabled, the content remains same without any sharpening effect + * and when this feature is applied, it enhances the clarity of the content. */ __printf(6, 0) @@ -940,6 +959,22 @@ int drm_crtc_create_scaling_filter_property(struct drm_crtc *crtc, } EXPORT_SYMBOL(drm_crtc_create_scaling_filter_property); +int drm_crtc_create_sharpness_strength_property(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_property *prop = + drm_property_create_range(dev, 0, "SHARPNESS_STRENGTH", 0, 255); + + if (!prop) + return -ENOMEM; + + crtc->sharpness_strength_property = prop; + drm_object_attach_property(&crtc->base, prop, 0); + + return 0; +} +EXPORT_SYMBOL(drm_crtc_create_sharpness_strength_property); + /** * drm_crtc_in_clone_mode - check if the given CRTC state is in clone mode * diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index caa56e039da2..bcdbde681986 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -317,6 +317,17 @@ struct drm_crtc_state { */ enum drm_scaling_filter scaling_filter; + /** + * @sharpness_strength: + * + * Used by the user to set the sharpness intensity. + * The value ranges from 0-255. + * Default value is 0 which disable the sharpness feature. + * Any value greater than 0 enables sharpening with the + * specified strength. + */ + u8 sharpness_strength; + /** * @event: * @@ -1088,6 +1099,12 @@ struct drm_crtc { */ struct drm_property *scaling_filter_property; + /** + * @sharpness_strength_property: property to apply + * the intensity of the sharpness requested. + */ + struct drm_property *sharpness_strength_property; + /** * @state: * @@ -1324,4 +1341,5 @@ static inline struct drm_crtc *drm_crtc_find(struct drm_device *dev, int drm_crtc_create_scaling_filter_property(struct drm_crtc *crtc, unsigned int supported_filters); bool drm_crtc_in_clone_mode(struct drm_crtc_state *crtc_state); +int drm_crtc_create_sharpness_strength_property(struct drm_crtc *crtc); #endif /* __DRM_CRTC_H__ */ -- cgit v1.2.3 From 72a583c2b4892164c1ccc20357652e703f23f44c Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:38 +0530 Subject: drm/i915/display: Introduce HAS_CASF for sharpness support Add HAS_CASF macro to check whether platform supports the content adaptive sharpness capability or not. v2: Update commit message[Ankit] v3: Remove \n from middle[Jani] v4: Remove the logging part Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-3-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 8fdb8a0a4282..ece66c8c4ce6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -148,6 +148,7 @@ struct intel_display_platforms { #define HAS_AS_SDP(__display) (DISPLAY_VER(__display) >= 13) #define HAS_AUX_CCS(__display) (IS_DISPLAY_VER(__display, 9, 12) || (__display)->platform.alderlake_p || (__display)->platform.meteorlake) #define HAS_BIGJOINER(__display) (DISPLAY_VER(__display) >= 11 && HAS_DSC(__display)) +#define HAS_CASF(__display) (DISPLAY_VER(__display) >= 20) #define HAS_CDCLK_CRAWL(__display) (DISPLAY_INFO(__display)->has_cdclk_crawl) #define HAS_CDCLK_SQUASH(__display) (DISPLAY_INFO(__display)->has_cdclk_squash) #define HAS_CMRR(__display) (DISPLAY_VER(__display) >= 20) -- cgit v1.2.3 From 74ad9ec94b70e7ca7f8f713458b8f2704c191e2a Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:39 +0530 Subject: drm/i915/display: Add CASF strength and winsize Add register definitions for sharpness strength and filter window size used by CASF. Provide functions to read and write these fields. The sharpness strength value is determined by user input, while the winsize is based on the resolution. The casf_enable flag should be set if the platform supports sharpness adjustments and the user API strength is not zero. Once sharpness is enabled, update the strength bit of the register whenever the user changes the strength value, as the enable bit and winsize bit remain constant. Introduce helper to enable, disable and update strength. Add relavant strength and winsize in both enable and disable. v2: Introduce get_config for casf[Ankit] v3: Replace 0 with FILTER_STRENGTH_MASK[Ankit] v4: After updating strength add win_sz register v5: Replace u16 with u32 for total_pixel v6: Add casf logging v7: Add helper for enable and disable casf Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-4-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_casf.c | 128 +++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_casf.h | 19 +++ drivers/gpu/drm/i915/display/intel_casf_regs.h | 22 ++++ .../gpu/drm/i915/display/intel_crtc_state_dump.c | 5 + drivers/gpu/drm/i915/display/intel_display_types.h | 7 ++ drivers/gpu/drm/i915/display/skl_scaler.c | 1 + drivers/gpu/drm/xe/Makefile | 1 + 8 files changed, 184 insertions(+) create mode 100644 drivers/gpu/drm/i915/display/intel_casf.c create mode 100644 drivers/gpu/drm/i915/display/intel_casf.h create mode 100644 drivers/gpu/drm/i915/display/intel_casf_regs.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 47bac9b2c611..be439fb45cca 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -234,6 +234,7 @@ i915-y += \ display/intel_bios.o \ display/intel_bo.o \ display/intel_bw.o \ + display/intel_casf.o \ display/intel_cdclk.o \ display/intel_cmtg.o \ display/intel_color.o \ diff --git a/drivers/gpu/drm/i915/display/intel_casf.c b/drivers/gpu/drm/i915/display/intel_casf.c new file mode 100644 index 000000000000..4949774bec7a --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_casf.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include + +#include "i915_reg.h" +#include "intel_casf.h" +#include "intel_casf_regs.h" +#include "intel_de.h" +#include "intel_display_regs.h" +#include "intel_display_types.h" + +#define MAX_PIXELS_FOR_3_TAP_FILTER (1920 * 1080) +#define MAX_PIXELS_FOR_5_TAP_FILTER (3840 * 2160) + +/** + * DOC: Content Adaptive Sharpness Filter (CASF) + * + * Starting from LNL the display engine supports an + * adaptive sharpening filter, enhancing the image + * quality. The display hardware utilizes the second + * pipe scaler for implementing CASF. + * If sharpness is being enabled then pipe scaling + * cannot be used. + * This filter operates on a region of pixels based + * on the tap size. Coefficients are used to generate + * an alpha value which blends the sharpened image to + * original image. + */ + +void intel_casf_update_strength(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int win_size; + + intel_de_rmw(display, SHARPNESS_CTL(crtc->pipe), FILTER_STRENGTH_MASK, + FILTER_STRENGTH(crtc_state->hw.casf_params.strength)); + + win_size = intel_de_read(display, SKL_PS_WIN_SZ(crtc->pipe, 1)); + + intel_de_write_fw(display, SKL_PS_WIN_SZ(crtc->pipe, 1), win_size); +} + +static void intel_casf_compute_win_size(struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *mode = &crtc_state->hw.adjusted_mode; + u32 total_pixels = mode->hdisplay * mode->vdisplay; + + if (total_pixels <= MAX_PIXELS_FOR_3_TAP_FILTER) + crtc_state->hw.casf_params.win_size = SHARPNESS_FILTER_SIZE_3X3; + else if (total_pixels <= MAX_PIXELS_FOR_5_TAP_FILTER) + crtc_state->hw.casf_params.win_size = SHARPNESS_FILTER_SIZE_5X5; + else + crtc_state->hw.casf_params.win_size = SHARPNESS_FILTER_SIZE_7X7; +} + +int intel_casf_compute_config(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (!HAS_CASF(display)) + return 0; + + if (crtc_state->uapi.sharpness_strength == 0) { + crtc_state->hw.casf_params.casf_enable = false; + crtc_state->hw.casf_params.strength = 0; + return 0; + } + + crtc_state->hw.casf_params.casf_enable = true; + + /* + * HW takes a value in form (1.0 + strength) in 4.4 fixed format. + * Strength is from 0.0-14.9375 ie from 0-239. + * User can give value from 0-255 but is clamped to 239. + * Ex. User gives 85 which is 5.3125 and adding 1.0 gives 6.3125. + * 6.3125 in 4.4 format is b01100101 which is equal to 101. + * Also 85 + 16 = 101. + */ + crtc_state->hw.casf_params.strength = + min(crtc_state->uapi.sharpness_strength, 0xEF) + 0x10; + + intel_casf_compute_win_size(crtc_state); + + return 0; +} + +void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + u32 sharp; + + sharp = intel_de_read(display, SHARPNESS_CTL(crtc->pipe)); + if (sharp & FILTER_EN) { + if (drm_WARN_ON(display->drm, + REG_FIELD_GET(FILTER_STRENGTH_MASK, sharp) < 16)) + crtc_state->hw.casf_params.strength = 0; + else + crtc_state->hw.casf_params.strength = + REG_FIELD_GET(FILTER_STRENGTH_MASK, sharp); + crtc_state->hw.casf_params.casf_enable = true; + crtc_state->hw.casf_params.win_size = + REG_FIELD_GET(FILTER_SIZE_MASK, sharp); + } +} + +void intel_casf_enable(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + u32 sharpness_ctl; + + sharpness_ctl = FILTER_EN | FILTER_STRENGTH(crtc_state->hw.casf_params.strength); + + sharpness_ctl |= crtc_state->hw.casf_params.win_size; + + intel_de_write(display, SHARPNESS_CTL(crtc->pipe), sharpness_ctl); +} + +void intel_casf_disable(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + intel_de_write(display, SHARPNESS_CTL(crtc->pipe), 0); +} diff --git a/drivers/gpu/drm/i915/display/intel_casf.h b/drivers/gpu/drm/i915/display/intel_casf.h new file mode 100644 index 000000000000..753871880279 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_casf.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __INTEL_CASF_H__ +#define __INTEL_CASF_H__ + +#include + +struct intel_crtc_state; + +int intel_casf_compute_config(struct intel_crtc_state *crtc_state); +void intel_casf_update_strength(struct intel_crtc_state *new_crtc_state); +void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state); +void intel_casf_enable(struct intel_crtc_state *crtc_state); +void intel_casf_disable(const struct intel_crtc_state *crtc_state); + +#endif /* __INTEL_CASF_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_casf_regs.h b/drivers/gpu/drm/i915/display/intel_casf_regs.h new file mode 100644 index 000000000000..bd763efe5c1b --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_casf_regs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __INTEL_CASF_REGS_H__ +#define __INTEL_CASF_REGS_H__ + +#include "intel_display_reg_defs.h" + +#define _SHARPNESS_CTL_A 0x682B0 +#define _SHARPNESS_CTL_B 0x68AB0 +#define SHARPNESS_CTL(pipe) _MMIO_PIPE(pipe, _SHARPNESS_CTL_A, _SHARPNESS_CTL_B) +#define FILTER_EN REG_BIT(31) +#define FILTER_STRENGTH_MASK REG_GENMASK(15, 8) +#define FILTER_STRENGTH(x) REG_FIELD_PREP(FILTER_STRENGTH_MASK, (x)) +#define FILTER_SIZE_MASK REG_GENMASK(1, 0) +#define SHARPNESS_FILTER_SIZE_3X3 REG_FIELD_PREP(FILTER_SIZE_MASK, 0) +#define SHARPNESS_FILTER_SIZE_5X5 REG_FIELD_PREP(FILTER_SIZE_MASK, 1) +#define SHARPNESS_FILTER_SIZE_7X7 REG_FIELD_PREP(FILTER_SIZE_MASK, 2) + +#endif /* __INTEL_CASF_REGS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index e6f300dbb5ee..c2a6217c2262 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -372,6 +372,11 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, intel_vdsc_state_dump(&p, 0, pipe_config); + drm_printf(&p, "sharpness strength: %d, sharpness tap size: %d, sharpness enable: %d\n", + pipe_config->hw.casf_params.strength, + pipe_config->hw.casf_params.win_size, + pipe_config->hw.casf_params.casf_enable); + dump_planes: if (!state) return; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 5ae66b7444b6..2afb346249ef 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -961,6 +961,12 @@ enum intel_panel_replay_dsc_support { INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE, }; +struct intel_casf { + u8 strength; + u8 win_size; + bool casf_enable; +}; + struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -997,6 +1003,7 @@ struct intel_crtc_state { struct drm_property_blob *degamma_lut, *gamma_lut, *ctm; struct drm_display_mode mode, pipe_mode, adjusted_mode; enum drm_scaling_filter scaling_filter; + struct intel_casf casf_params; } hw; /* actual state of LUTs */ diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index d29efcbf2319..dbe1c4a63ec7 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -6,6 +6,7 @@ #include #include "i915_utils.h" +#include "intel_casf_regs.h" #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_trace.h" diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5d6e80d541e0..58e1d68e495b 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -231,6 +231,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ i915-display/intel_bw.o \ + i915-display/intel_casf.o \ i915-display/intel_cdclk.o \ i915-display/intel_cmtg.o \ i915-display/intel_color.o \ -- cgit v1.2.3 From 515d1c895ffcd168b7b8b7bec1d6a2ec6edfe915 Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:40 +0530 Subject: drm/i915/display: Add filter lut values Add the register bits related to filter lut values and helper to load the casf filter lut. These values are golden values and these value has to be loaded one time while enabling the casf. v2: update commit message[Ankit] v3: Add intel_casf prefix to filter_load fn[Jani] v4: Define the filter macros here Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-5-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_casf.c | 49 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_casf_regs.h | 11 ++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_casf.c b/drivers/gpu/drm/i915/display/intel_casf.c index 4949774bec7a..bd63fdafb898 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.c +++ b/drivers/gpu/drm/i915/display/intel_casf.c @@ -13,6 +13,13 @@ #define MAX_PIXELS_FOR_3_TAP_FILTER (1920 * 1080) #define MAX_PIXELS_FOR_5_TAP_FILTER (3840 * 2160) +#define FILTER_COEFF_0_125 125 +#define FILTER_COEFF_0_25 250 +#define FILTER_COEFF_0_5 500 +#define FILTER_COEFF_1_0 1000 +#define FILTER_COEFF_0_0 0 +#define SET_POSITIVE_SIGN(x) ((x) & (~SIGN)) + /** * DOC: Content Adaptive Sharpness Filter (CASF) * @@ -28,6 +35,46 @@ * original image. */ +/* Default LUT values to be loaded one time. */ +static const u16 sharpness_lut[] = { + 4095, 2047, 1364, 1022, 816, 678, 579, + 504, 444, 397, 357, 323, 293, 268, 244, 224, + 204, 187, 170, 154, 139, 125, 111, 98, 85, + 73, 60, 48, 36, 24, 12, 0 +}; + +const u16 filtercoeff_1[] = { + FILTER_COEFF_0_0, FILTER_COEFF_0_0, FILTER_COEFF_0_5, + FILTER_COEFF_1_0, FILTER_COEFF_0_5, FILTER_COEFF_0_0, + FILTER_COEFF_0_0, +}; + +const u16 filtercoeff_2[] = { + FILTER_COEFF_0_0, FILTER_COEFF_0_25, FILTER_COEFF_0_5, + FILTER_COEFF_1_0, FILTER_COEFF_0_5, FILTER_COEFF_0_25, + FILTER_COEFF_0_0, +}; + +const u16 filtercoeff_3[] = { + FILTER_COEFF_0_125, FILTER_COEFF_0_25, FILTER_COEFF_0_5, + FILTER_COEFF_1_0, FILTER_COEFF_0_5, FILTER_COEFF_0_25, + FILTER_COEFF_0_125, +}; + +static void intel_casf_filter_lut_load(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + int i; + + intel_de_write(display, SHRPLUT_INDEX(crtc->pipe), + INDEX_AUTO_INCR | INDEX_VALUE(0)); + + for (i = 0; i < ARRAY_SIZE(sharpness_lut); i++) + intel_de_write(display, SHRPLUT_DATA(crtc->pipe), + sharpness_lut[i]); +} + void intel_casf_update_strength(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -112,6 +159,8 @@ void intel_casf_enable(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); u32 sharpness_ctl; + intel_casf_filter_lut_load(crtc, crtc_state); + sharpness_ctl = FILTER_EN | FILTER_STRENGTH(crtc_state->hw.casf_params.strength); sharpness_ctl |= crtc_state->hw.casf_params.win_size; diff --git a/drivers/gpu/drm/i915/display/intel_casf_regs.h b/drivers/gpu/drm/i915/display/intel_casf_regs.h index bd763efe5c1b..87803cca510f 100644 --- a/drivers/gpu/drm/i915/display/intel_casf_regs.h +++ b/drivers/gpu/drm/i915/display/intel_casf_regs.h @@ -19,4 +19,15 @@ #define SHARPNESS_FILTER_SIZE_5X5 REG_FIELD_PREP(FILTER_SIZE_MASK, 1) #define SHARPNESS_FILTER_SIZE_7X7 REG_FIELD_PREP(FILTER_SIZE_MASK, 2) +#define _SHRPLUT_DATA_A 0x682B8 +#define _SHRPLUT_DATA_B 0x68AB8 +#define SHRPLUT_DATA(pipe) _MMIO_PIPE(pipe, _SHRPLUT_DATA_A, _SHRPLUT_DATA_B) + +#define _SHRPLUT_INDEX_A 0x682B4 +#define _SHRPLUT_INDEX_B 0x68AB4 +#define SHRPLUT_INDEX(pipe) _MMIO_PIPE(pipe, _SHRPLUT_INDEX_A, _SHRPLUT_INDEX_B) +#define INDEX_AUTO_INCR REG_BIT(10) +#define INDEX_VALUE_MASK REG_GENMASK(4, 0) +#define INDEX_VALUE(x) REG_FIELD_PREP(INDEX_VALUE_MASK, (x)) + #endif /* __INTEL_CASF_REGS__ */ -- cgit v1.2.3 From 76f51cdc5d45a5497f7f2578ff81fd3efcabb3bd Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:41 +0530 Subject: drm/i915/display: Compute the scaler coefficients The sharpness property requires the use of one of the scaler so need to set the sharpness scaler coefficient values. These values are based on experiments and vary for different tap value/win size. These values are normalized by taking the sum of all values and then dividing each value with a sum. Add helper to compute and set the scaler coefficients. v2: Fix ifndef header naming issue reported by kernel test robot v3: Rename file name[Arun] Replace array size number with macro[Arun] v4: Correct the register format[Jani] Add brief comment and expalin about file[Jani] Remove coefficient value from crtc_state[Jani] v5: Fix build issue v6: Add new function for writing coefficients[Ankit] v7: Add cooments and add a scaler id check [Ankit] v8: Remove casf_enable from here[Ankit] v9: Removed REG and use shift operator[Jani] v10: Remove filter macros v11: Add casf_write_coeff function to casf_enable Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-6-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_casf.c | 99 ++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_casf.h | 1 + drivers/gpu/drm/i915/display/intel_display_types.h | 8 ++ 3 files changed, 108 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_casf.c b/drivers/gpu/drm/i915/display/intel_casf.c index bd63fdafb898..ee24e5957d3d 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.c +++ b/drivers/gpu/drm/i915/display/intel_casf.c @@ -130,6 +130,8 @@ int intel_casf_compute_config(struct intel_crtc_state *crtc_state) intel_casf_compute_win_size(crtc_state); + intel_casf_scaler_compute_config(crtc_state); + return 0; } @@ -153,6 +155,101 @@ void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state) } } +static int casf_coeff_tap(int i) +{ + return i % SCALER_FILTER_NUM_TAPS; +} + +static u32 casf_coeff(struct intel_crtc_state *crtc_state, int t) +{ + struct scaler_filter_coeff value; + u32 coeff; + + value = crtc_state->hw.casf_params.coeff[t]; + value.sign = 0; + + coeff = value.sign << 15 | value.exp << 12 | value.mantissa << 3; + return coeff; +} + +/* + * 17 phase of 7 taps requires 119 coefficients in 60 dwords per set. + * To enable casf: program scaler coefficients with the coeffients + * that are calculated and stored in hw.casf_params.coeff as per + * SCALER_COEFFICIENT_FORMAT + */ +static void intel_casf_write_coeff(struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int id = crtc_state->scaler_state.scaler_id; + int i; + + if (id != 1) { + drm_WARN(display->drm, 0, "Second scaler not enabled\n"); + return; + } + + intel_de_write_fw(display, GLK_PS_COEF_INDEX_SET(crtc->pipe, id, 0), + PS_COEF_INDEX_AUTO_INC); + + for (i = 0; i < 17 * SCALER_FILTER_NUM_TAPS; i += 2) { + u32 tmp; + int t; + + t = casf_coeff_tap(i); + tmp = casf_coeff(crtc_state, t); + + t = casf_coeff_tap(i + 1); + tmp |= casf_coeff(crtc_state, t) << 16; + + intel_de_write_fw(display, GLK_PS_COEF_DATA_SET(crtc->pipe, id, 0), + tmp); + } +} + +static void convert_sharpness_coef_binary(struct scaler_filter_coeff *coeff, + u16 coefficient) +{ + if (coefficient < 25) { + coeff->mantissa = (coefficient * 2048) / 100; + coeff->exp = 3; + } else if (coefficient < 50) { + coeff->mantissa = (coefficient * 1024) / 100; + coeff->exp = 2; + } else if (coefficient < 100) { + coeff->mantissa = (coefficient * 512) / 100; + coeff->exp = 1; + } else { + coeff->mantissa = (coefficient * 256) / 100; + coeff->exp = 0; + } +} + +void intel_casf_scaler_compute_config(struct intel_crtc_state *crtc_state) +{ + const u16 *filtercoeff; + u16 filter_coeff[SCALER_FILTER_NUM_TAPS]; + u16 sumcoeff = 0; + int i; + + if (crtc_state->hw.casf_params.win_size == 0) + filtercoeff = filtercoeff_1; + else if (crtc_state->hw.casf_params.win_size == 1) + filtercoeff = filtercoeff_2; + else + filtercoeff = filtercoeff_3; + + for (i = 0; i < SCALER_FILTER_NUM_TAPS; i++) + sumcoeff += *(filtercoeff + i); + + for (i = 0; i < SCALER_FILTER_NUM_TAPS; i++) { + filter_coeff[i] = (*(filtercoeff + i) * 100 / sumcoeff); + convert_sharpness_coef_binary(&crtc_state->hw.casf_params.coeff[i], + filter_coeff[i]); + } +} + void intel_casf_enable(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -161,6 +258,8 @@ void intel_casf_enable(struct intel_crtc_state *crtc_state) intel_casf_filter_lut_load(crtc, crtc_state); + intel_casf_write_coeff(crtc_state); + sharpness_ctl = FILTER_EN | FILTER_STRENGTH(crtc_state->hw.casf_params.strength); sharpness_ctl |= crtc_state->hw.casf_params.win_size; diff --git a/drivers/gpu/drm/i915/display/intel_casf.h b/drivers/gpu/drm/i915/display/intel_casf.h index 753871880279..203a5587921c 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.h +++ b/drivers/gpu/drm/i915/display/intel_casf.h @@ -15,5 +15,6 @@ void intel_casf_update_strength(struct intel_crtc_state *new_crtc_state); void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state); void intel_casf_enable(struct intel_crtc_state *crtc_state); void intel_casf_disable(const struct intel_crtc_state *crtc_state); +void intel_casf_scaler_compute_config(struct intel_crtc_state *crtc_state); #endif /* __INTEL_CASF_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 2afb346249ef..00600134bda0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -961,7 +961,15 @@ enum intel_panel_replay_dsc_support { INTEL_DP_PANEL_REPLAY_DSC_SELECTIVE_UPDATE, }; +struct scaler_filter_coeff { + u16 sign; + u16 exp; + u16 mantissa; +}; + struct intel_casf { + #define SCALER_FILTER_NUM_TAPS 7 + struct scaler_filter_coeff coeff[SCALER_FILTER_NUM_TAPS]; u8 strength; u8 win_size; bool casf_enable; -- cgit v1.2.3 From 82860cba5f4053f95414634e69d3229462b45422 Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:42 +0530 Subject: drm/i915/display: Add and compute scaler parameter Compute the values for second scaler for sharpness. Fill the register bits corresponding to the scaler. v1: Rename the title of patch [Ankit] v2: Remove setup_casf from here[Ankit] v3: Add skl_scaler_setup_casf in casf_enable Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-7-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_casf.c | 6 ++++ drivers/gpu/drm/i915/display/skl_scaler.c | 46 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/skl_scaler.h | 2 ++ 3 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_casf.c b/drivers/gpu/drm/i915/display/intel_casf.c index ee24e5957d3d..cdf1adc27afd 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.c +++ b/drivers/gpu/drm/i915/display/intel_casf.c @@ -9,6 +9,7 @@ #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "skl_scaler.h" #define MAX_PIXELS_FOR_3_TAP_FILTER (1920 * 1080) #define MAX_PIXELS_FOR_5_TAP_FILTER (3840 * 2160) @@ -265,6 +266,8 @@ void intel_casf_enable(struct intel_crtc_state *crtc_state) sharpness_ctl |= crtc_state->hw.casf_params.win_size; intel_de_write(display, SHARPNESS_CTL(crtc->pipe), sharpness_ctl); + + skl_scaler_setup_casf(crtc_state); } void intel_casf_disable(const struct intel_crtc_state *crtc_state) @@ -272,5 +275,8 @@ void intel_casf_disable(const struct intel_crtc_state *crtc_state) struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + intel_de_write(display, SKL_PS_CTRL(crtc->pipe, 1), 0); + intel_de_write(display, SKL_PS_WIN_POS(crtc->pipe, 1), 0); intel_de_write(display, SHARPNESS_CTL(crtc->pipe), 0); + intel_de_write(display, SKL_PS_WIN_SZ(crtc->pipe, 1), 0); } diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index dbe1c4a63ec7..dbb810c0474c 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -739,6 +739,52 @@ static void skl_scaler_setup_filter(struct intel_display *display, } } +#define CASF_SCALER_FILTER_SELECT \ + (PS_FILTER_PROGRAMMED | \ + PS_Y_VERT_FILTER_SELECT(0) | \ + PS_Y_HORZ_FILTER_SELECT(0) | \ + PS_UV_VERT_FILTER_SELECT(0) | \ + PS_UV_HORZ_FILTER_SELECT(0)) + +void skl_scaler_setup_casf(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_display *display = to_intel_display(crtc); + struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + struct drm_rect src, dest; + int id, width, height; + int x = 0, y = 0; + enum pipe pipe = crtc->pipe; + u32 ps_ctrl; + + width = adjusted_mode->crtc_hdisplay; + height = adjusted_mode->crtc_vdisplay; + + drm_rect_init(&dest, x, y, width, height); + + width = drm_rect_width(&dest); + height = drm_rect_height(&dest); + id = scaler_state->scaler_id; + + drm_rect_init(&src, 0, 0, + drm_rect_width(&crtc_state->pipe_src) << 16, + drm_rect_height(&crtc_state->pipe_src) << 16); + + trace_intel_pipe_scaler_update_arm(crtc, id, x, y, width, height); + + ps_ctrl = PS_SCALER_EN | PS_BINDING_PIPE | scaler_state->scalers[id].mode | + CASF_SCALER_FILTER_SELECT; + + intel_de_write_fw(display, SKL_PS_CTRL(pipe, id), ps_ctrl); + intel_de_write_fw(display, SKL_PS_WIN_POS(pipe, id), + PS_WIN_XPOS(x) | PS_WIN_YPOS(y)); + intel_de_write_fw(display, SKL_PS_WIN_SZ(pipe, id), + PS_WIN_XSIZE(width) | PS_WIN_YSIZE(height)); +} + void skl_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 5deabca909e6..7e8d819c019d 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -36,6 +36,8 @@ void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); void skl_scaler_get_config(struct intel_crtc_state *crtc_state); +void skl_scaler_setup_casf(struct intel_crtc_state *crtc_state); + enum drm_mode_status skl_scaler_mode_valid(struct intel_display *display, const struct drm_display_mode *mode, -- cgit v1.2.3 From 0672cf9828c461190c1be05803aad5b960702f76 Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:43 +0530 Subject: drm/i915/display: Configure the second scaler Both sharpness and panel fitter use pipe scaler, but only one can be enabled at a time. Furthermore sharpness uses second scaler. So for CASF, check if second scaler is available and make sure that only either of panel fitter or sharpness is enabled at a time. v2: Add the panel fitting check before enabling sharpness v3: Reframe commit message[Arun] v4: Replace string based comparison with plane_state[Jani] v5: Rebase v6: Fix build issue v7: Remove scaler id from verify_crtc_state[Ankit] v8: Change the patch title. Add code comment. Move the config part in patch#6. [Ankit] v9: Refactor the patch[Ankit] v10: Modify the header of patch[Ankit] Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-8-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_casf.c | 8 ++++++++ drivers/gpu/drm/i915/display/intel_casf.h | 1 + drivers/gpu/drm/i915/display/intel_display.c | 4 +++- drivers/gpu/drm/i915/display/skl_scaler.c | 25 +++++++++++++++++++------ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_casf.c b/drivers/gpu/drm/i915/display/intel_casf.c index cdf1adc27afd..95339b496f24 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.c +++ b/drivers/gpu/drm/i915/display/intel_casf.c @@ -156,6 +156,14 @@ void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state) } } +bool intel_casf_needs_scaler(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->hw.casf_params.casf_enable) + return true; + + return false; +} + static int casf_coeff_tap(int i) { return i % SCALER_FILTER_NUM_TAPS; diff --git a/drivers/gpu/drm/i915/display/intel_casf.h b/drivers/gpu/drm/i915/display/intel_casf.h index 203a5587921c..b3fb0bcb3f5b 100644 --- a/drivers/gpu/drm/i915/display/intel_casf.h +++ b/drivers/gpu/drm/i915/display/intel_casf.h @@ -16,5 +16,6 @@ void intel_casf_sharpness_get_config(struct intel_crtc_state *crtc_state); void intel_casf_enable(struct intel_crtc_state *crtc_state); void intel_casf_disable(const struct intel_crtc_state *crtc_state); void intel_casf_scaler_compute_config(struct intel_crtc_state *crtc_state); +bool intel_casf_needs_scaler(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_CASF_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2744f83bda2e..0435b5da674a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -60,6 +60,7 @@ #include "intel_audio.h" #include "intel_bo.h" #include "intel_bw.h" +#include "intel_casf.h" #include "intel_cdclk.h" #include "intel_clock_gating.h" #include "intel_color.h" @@ -4229,7 +4230,8 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, if (DISPLAY_VER(display) >= 9) { if (intel_crtc_needs_modeset(crtc_state) || - intel_crtc_needs_fastset(crtc_state)) { + intel_crtc_needs_fastset(crtc_state) || + intel_casf_needs_scaler(crtc_state)) { ret = skl_update_scaler_crtc(crtc_state); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index dbb810c0474c..9439b1a65fa6 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -6,6 +6,7 @@ #include #include "i915_utils.h" +#include "intel_casf.h" #include "intel_casf_regs.h" #include "intel_de.h" #include "intel_display_regs.h" @@ -283,7 +284,8 @@ int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state) drm_rect_width(&crtc_state->pipe_src), drm_rect_height(&crtc_state->pipe_src), width, height, NULL, 0, - crtc_state->pch_pfit.enabled); + crtc_state->pch_pfit.enabled || + intel_casf_needs_scaler(crtc_state)); } /** @@ -322,7 +324,9 @@ int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, } static int intel_allocate_scaler(struct intel_crtc_scaler_state *scaler_state, - struct intel_crtc *crtc) + struct intel_crtc *crtc, + struct intel_plane_state *plane_state, + bool casf_scaler) { int i; @@ -330,6 +334,10 @@ static int intel_allocate_scaler(struct intel_crtc_scaler_state *scaler_state, if (scaler_state->scalers[i].in_use) continue; + /* CASF needs second scaler */ + if (!plane_state && casf_scaler && i != 1) + continue; + scaler_state->scalers[i].in_use = true; return i; @@ -380,7 +388,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_state *crtc_state, int num_scalers_need, struct intel_crtc *crtc, const char *name, int idx, struct intel_plane_state *plane_state, - int *scaler_id) + int *scaler_id, bool casf_scaler) { struct intel_display *display = to_intel_display(crtc); struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; @@ -389,7 +397,7 @@ static int intel_atomic_setup_scaler(struct intel_crtc_state *crtc_state, int vscale = 0; if (*scaler_id < 0) - *scaler_id = intel_allocate_scaler(scaler_state, crtc); + *scaler_id = intel_allocate_scaler(scaler_state, crtc, plane_state, casf_scaler); if (drm_WARN(display->drm, *scaler_id < 0, "Cannot find scaler for %s:%d\n", name, idx)) @@ -521,10 +529,14 @@ static int setup_crtc_scaler(struct intel_atomic_state *state, struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; + if (intel_casf_needs_scaler(crtc_state) && crtc_state->pch_pfit.enabled) + return -EINVAL; + return intel_atomic_setup_scaler(crtc_state, hweight32(scaler_state->scaler_users), crtc, "CRTC", crtc->base.base.id, - NULL, &scaler_state->scaler_id); + NULL, &scaler_state->scaler_id, + intel_casf_needs_scaler(crtc_state)); } static int setup_plane_scaler(struct intel_atomic_state *state, @@ -559,7 +571,8 @@ static int setup_plane_scaler(struct intel_atomic_state *state, return intel_atomic_setup_scaler(crtc_state, hweight32(scaler_state->scaler_users), crtc, "PLANE", plane->base.base.id, - plane_state, &plane_state->scaler_id); + plane_state, &plane_state->scaler_id, + false); } /** -- cgit v1.2.3 From 5f331b245293babfe1900edb0bf8f4a5fb2eeb2f Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:44 +0530 Subject: drm/i915/display: Set and get the casf config Set the configuration for CASF and capture it in crtc_state and get the configuration by reading back. Add the support to compare the software and hardware state of CASF. v2: Update subject[Ankit] v3: Add the state compare[Ankit] Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-9-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 7 +++++++ drivers/gpu/drm/i915/display/skl_scaler.c | 19 +++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0435b5da674a..c82439150eb9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4228,6 +4228,10 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, return ret; } + ret = intel_casf_compute_config(crtc_state); + if (ret) + return ret; + if (DISPLAY_VER(display) >= 9) { if (intel_crtc_needs_modeset(crtc_state) || intel_crtc_needs_fastset(crtc_state) || @@ -5294,6 +5298,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(scaler_state.scaler_id); PIPE_CONF_CHECK_I(pixel_rate); + PIPE_CONF_CHECK_BOOL(hw.casf_params.casf_enable); + PIPE_CONF_CHECK_I(hw.casf_params.win_size); + PIPE_CONF_CHECK_I(hw.casf_params.strength); PIPE_CONF_CHECK_X(gamma_mode); if (display->platform.cherryview) diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 9439b1a65fa6..92b920899dce 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -981,16 +981,23 @@ void skl_scaler_get_config(struct intel_crtc_state *crtc_state) continue; id = i; - crtc_state->pch_pfit.enabled = true; + + /* Read CASF regs for second scaler */ + if (HAS_CASF(display) && id == 1) + intel_casf_sharpness_get_config(crtc_state); + + if (!crtc_state->hw.casf_params.casf_enable) + crtc_state->pch_pfit.enabled = true; pos = intel_de_read(display, SKL_PS_WIN_POS(crtc->pipe, i)); size = intel_de_read(display, SKL_PS_WIN_SZ(crtc->pipe, i)); - drm_rect_init(&crtc_state->pch_pfit.dst, - REG_FIELD_GET(PS_WIN_XPOS_MASK, pos), - REG_FIELD_GET(PS_WIN_YPOS_MASK, pos), - REG_FIELD_GET(PS_WIN_XSIZE_MASK, size), - REG_FIELD_GET(PS_WIN_YSIZE_MASK, size)); + if (!crtc_state->hw.casf_params.casf_enable) + drm_rect_init(&crtc_state->pch_pfit.dst, + REG_FIELD_GET(PS_WIN_XPOS_MASK, pos), + REG_FIELD_GET(PS_WIN_YPOS_MASK, pos), + REG_FIELD_GET(PS_WIN_XSIZE_MASK, size), + REG_FIELD_GET(PS_WIN_YSIZE_MASK, size)); scaler_state->scalers[i].in_use = true; break; -- cgit v1.2.3 From 39f39d2e9d0edfbd4a2f26ab438b0982ef6611d1 Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:45 +0530 Subject: drm/i915/display: Enable/disable casf Call intel_casf_enable and intel_casf_disable in atomic commit path to enable and disable casf. Call intel_casf_update_strength to only update the desired strength value. v2: Introduce casf_enable here.[Ankit] v3: Use is_disabling in casf_disabling.[Ankit] v4: Swap old_state and new_state param.[Ankit] v5: In disable fn move win_sz after sharpness_ctl. v6: Rebase and update commit message. Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-10-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c82439150eb9..94fff7c5f8dc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -980,6 +980,24 @@ static bool audio_disabling(const struct intel_crtc_state *old_crtc_state, memcmp(old_crtc_state->eld, new_crtc_state->eld, MAX_ELD_BYTES) != 0); } +static bool intel_casf_enabling(const struct intel_crtc_state *new_crtc_state, + const struct intel_crtc_state *old_crtc_state) +{ + if (!new_crtc_state->hw.active) + return false; + + return is_enabling(hw.casf_params.casf_enable, old_crtc_state, new_crtc_state); +} + +static bool intel_casf_disabling(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->hw.active) + return false; + + return is_disabling(hw.casf_params.casf_enable, old_crtc_state, new_crtc_state); +} + #undef is_disabling #undef is_enabling @@ -1135,6 +1153,9 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, if (audio_disabling(old_crtc_state, new_crtc_state)) intel_encoders_audio_disable(state, crtc); + if (intel_casf_disabling(old_crtc_state, new_crtc_state)) + intel_casf_disable(new_crtc_state); + intel_drrs_deactivate(old_crtc_state); if (hsw_ips_pre_update(state, crtc)) @@ -6735,6 +6756,11 @@ static void intel_pre_update_crtc(struct intel_atomic_state *state, intel_vrr_set_transcoder_timings(new_crtc_state); } + if (intel_casf_enabling(new_crtc_state, old_crtc_state)) + intel_casf_enable(new_crtc_state); + else if (new_crtc_state->hw.casf_params.strength != old_crtc_state->hw.casf_params.strength) + intel_casf_update_strength(new_crtc_state); + intel_fbc_update(state, crtc); drm_WARN_ON(display->drm, !intel_display_power_is_enabled(display, POWER_DOMAIN_DC_OFF)); -- cgit v1.2.3 From 7460b69f1d61e8999e47b692ceb73c758fbebfcd Mon Sep 17 00:00:00 2001 From: Nemesa Garg Date: Tue, 28 Oct 2025 17:37:46 +0530 Subject: drm/i915/display: Expose sharpness strength property Expose the drm crtc sharpness strength property which will enable or disable the sharpness/casf based on user input. With this user can set/update the strength of the sharpness or casf filter. v2: Update subject[Ankit] Signed-off-by: Nemesa Garg Reviewed-by: Ankit Nautiyal Link: https://patch.msgid.link/20251028120747.3027332-11-ankit.k.nautiyal@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index d300ba1dcd2c..802ae5aaece1 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -395,6 +395,9 @@ int intel_crtc_init(struct intel_display *display, enum pipe pipe) drm_WARN_ON(display->drm, drm_crtc_index(&crtc->base) != crtc->pipe); + if (HAS_CASF(display)) + drm_crtc_create_sharpness_strength_property(&crtc->base); + return 0; fail: -- cgit v1.2.3 From 23db1577ce2d6e6b1d1c8447dcc136dd2d443bd9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Oct 2025 10:46:02 +0200 Subject: drm/i915/dsi: log send packet sequence errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We might be getting send packet sequence errors and never know. Log them as errors. Also upgrade the not supported read commands to errors. v2: Also error log -EOPNOTSUPP (Ville) Reviewed-by: Ville Syrjälä Link: https://patch.msgid.link/20251029084603.2254982-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 23402408e172..63837406d99b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -106,6 +106,7 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, u8 type, flags, seq_port; u16 len; enum port port; + ssize_t ret; drm_dbg_kms(display->drm, "\n"); @@ -138,36 +139,39 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, switch (type) { case MIPI_DSI_GENERIC_SHORT_WRITE_0_PARAM: - mipi_dsi_generic_write(dsi_device, NULL, 0); + ret = mipi_dsi_generic_write(dsi_device, NULL, 0); break; case MIPI_DSI_GENERIC_SHORT_WRITE_1_PARAM: - mipi_dsi_generic_write(dsi_device, data, 1); + ret = mipi_dsi_generic_write(dsi_device, data, 1); break; case MIPI_DSI_GENERIC_SHORT_WRITE_2_PARAM: - mipi_dsi_generic_write(dsi_device, data, 2); + ret = mipi_dsi_generic_write(dsi_device, data, 2); break; case MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM: case MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM: case MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM: - drm_dbg_kms(display->drm, "Generic Read not yet implemented or used\n"); + ret = -EOPNOTSUPP; break; case MIPI_DSI_GENERIC_LONG_WRITE: - mipi_dsi_generic_write(dsi_device, data, len); + ret = mipi_dsi_generic_write(dsi_device, data, len); break; case MIPI_DSI_DCS_SHORT_WRITE: - mipi_dsi_dcs_write_buffer(dsi_device, data, 1); + ret = mipi_dsi_dcs_write_buffer(dsi_device, data, 1); break; case MIPI_DSI_DCS_SHORT_WRITE_PARAM: - mipi_dsi_dcs_write_buffer(dsi_device, data, 2); + ret = mipi_dsi_dcs_write_buffer(dsi_device, data, 2); break; case MIPI_DSI_DCS_READ: - drm_dbg_kms(display->drm, "DCS Read not yet implemented or used\n"); + ret = -EOPNOTSUPP; break; case MIPI_DSI_DCS_LONG_WRITE: - mipi_dsi_dcs_write_buffer(dsi_device, data, len); + ret = mipi_dsi_dcs_write_buffer(dsi_device, data, len); break; } + if (ret < 0) + drm_err(display->drm, "DSI send packet failed with %pe\n", ERR_PTR(ret)); + if (DISPLAY_VER(display) < 11) vlv_dsi_wait_for_fifo_empty(intel_dsi, port); -- cgit v1.2.3 From 427c69c7d4bc999a3661e028e9688f77fa3e17ed Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Oct 2025 10:46:03 +0200 Subject: drm/i915/dsi: debug log send packet sequence contents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This might help debug issues better than just debug logging the function name. v2: Debug log type as hex (Ville) Reviewed-by: Ville Syrjälä Link: https://patch.msgid.link/20251029084603.2254982-2-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 63837406d99b..fac8729e442c 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -107,8 +107,7 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, u16 len; enum port port; ssize_t ret; - - drm_dbg_kms(display->drm, "\n"); + bool hs_mode; flags = *data++; type = *data++; @@ -130,13 +129,18 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, goto out; } - if ((flags >> MIPI_TRANSFER_MODE_SHIFT) & 1) + hs_mode = (flags >> MIPI_TRANSFER_MODE_SHIFT) & 1; + if (hs_mode) dsi_device->mode_flags &= ~MIPI_DSI_MODE_LPM; else dsi_device->mode_flags |= MIPI_DSI_MODE_LPM; dsi_device->channel = (flags >> MIPI_VIRTUAL_CHANNEL_SHIFT) & 3; + drm_dbg_kms(display->drm, "DSI packet: Port %c (seq %u), Flags 0x%02x, VC %u, %s, Type 0x%02x, Length %u, Data %*ph\n", + port_name(port), seq_port, flags, dsi_device->channel, + hs_mode ? "HS" : "LP", type, len, (int)len, data); + switch (type) { case MIPI_DSI_GENERIC_SHORT_WRITE_0_PARAM: ret = mipi_dsi_generic_write(dsi_device, NULL, 0); -- cgit v1.2.3 From 00423c4a7dd7790efb7454c0fdbe7cc33f893967 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 18:17:09 +0300 Subject: drm/i915: split out separate files for jiffies timeout and wait helpers Add i915_jiffies.h and intel_display_jiffies.h for jiffies timeout and wait helpers, and use them separately from i915 and display. This helps reduce the display dependency on i915_utils.h. Long term, both msecs_to_jiffies_timeout() and wait_remaining_ms_from_jiffies() really belong in core kernel headers, but for now unblock display refactoring. Reviewed-by: Luca Coelho Link: https://patch.msgid.link/d8bc62b3a81afa05c849dde9b0f633572eaf5611.1761146196.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_display_jiffies.h | 43 ++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.c | 1 + .../gpu/drm/i915/display/intel_dp_link_training.c | 1 + drivers/gpu/drm/i915/display/intel_hdcp.c | 1 + drivers/gpu/drm/i915/display/intel_pmdemand.c | 1 + drivers/gpu/drm/i915/display/intel_pps.c | 1 + drivers/gpu/drm/i915/display/intel_vblank.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 5 ++- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 + drivers/gpu/drm/i915/gt/selftest_execlists.c | 1 + drivers/gpu/drm/i915/i915_config.c | 2 +- drivers/gpu/drm/i915/i915_jiffies.h | 16 ++++++++ drivers/gpu/drm/i915/i915_utils.h | 34 ----------------- drivers/gpu/drm/i915/selftests/i915_selftest.c | 1 + 15 files changed, 73 insertions(+), 37 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_display_jiffies.h create mode 100644 drivers/gpu/drm/i915/i915_jiffies.h diff --git a/drivers/gpu/drm/i915/display/intel_display_jiffies.h b/drivers/gpu/drm/i915/display/intel_display_jiffies.h new file mode 100644 index 000000000000..c060c567e262 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_jiffies.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_DISPLAY_JIFFIES_H__ +#define __INTEL_DISPLAY_JIFFIES_H__ + +#include + +static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) +{ + unsigned long j = msecs_to_jiffies(m); + + return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1); +} + +/* + * If you need to wait X milliseconds between events A and B, but event B + * doesn't happen exactly after event A, you record the timestamp (jiffies) of + * when event A happened, then just before event B you call this function and + * pass the timestamp as the first argument, and X as the second argument. + */ +static inline void +wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) +{ + unsigned long target_jiffies, tmp_jiffies, remaining_jiffies; + + /* + * Don't re-read the value of "jiffies" every time since it may change + * behind our back and break the math. + */ + tmp_jiffies = jiffies; + target_jiffies = timestamp_jiffies + + msecs_to_jiffies_timeout(to_wait_ms); + + if (time_after(target_jiffies, tmp_jiffies)) { + remaining_jiffies = target_jiffies - tmp_jiffies; + while (remaining_jiffies) + remaining_jiffies = + schedule_timeout_uninterruptible(remaining_jiffies); + } +} + +#endif /* __INTEL_DISPLAY_JIFFIES_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a3391b17571c..0277ac5fc64f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -64,6 +64,7 @@ #include "intel_ddi.h" #include "intel_de.h" #include "intel_display_driver.h" +#include "intel_display_jiffies.h" #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 27f3716bdc1f..316b42b5a946 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -29,6 +29,7 @@ #include "i915_utils.h" #include "intel_display_core.h" +#include "intel_display_jiffies.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 531ee122bf82..0f624004104f 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -22,6 +22,7 @@ #include "i915_utils.h" #include "intel_connector.h" #include "intel_de.h" +#include "intel_display_jiffies.h" #include "intel_display_power.h" #include "intel_display_power_well.h" #include "intel_display_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index d806c15db7ce..7ade35e05eb5 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -12,6 +12,7 @@ #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_de.h" +#include "intel_display_jiffies.h" #include "intel_display_regs.h" #include "intel_display_trace.h" #include "intel_pmdemand.h" diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 327e0de86f1e..0c2f48626d0c 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -12,6 +12,7 @@ #include "i915_reg.h" #include "i915_utils.h" #include "intel_de.h" +#include "intel_display_jiffies.h" #include "intel_display_power_well.h" #include "intel_display_regs.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 2fc0c1c0bb87..0e534be375ca 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -12,6 +12,7 @@ #include "intel_color.h" #include "intel_crtc.h" #include "intel_de.h" +#include "intel_display_jiffies.h" #include "intel_display_regs.h" #include "intel_display_types.h" #include "intel_vblank.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 75f5b0e871ef..4542135b20d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -16,12 +16,13 @@ #include "i915_gem_evict.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" -#include "i915_gem_object.h" #include "i915_gem_mman.h" +#include "i915_gem_object.h" +#include "i915_gem_ttm.h" +#include "i915_jiffies.h" #include "i915_mm.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_gem_ttm.h" #include "i915_vma.h" static inline bool diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 1f4814968868..045fbdbbe183 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -10,6 +10,7 @@ #include #include "i915_drv.h" +#include "i915_jiffies.h" #include "i915_ttm_buddy_manager.h" #include "intel_memory_region.h" #include "intel_region_ttm.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 8d4bb95f8424..dcd3a3047aa4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_jiffies.h" #include "i915_request.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 0454eb1814bb..376f201a7650 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -11,6 +11,7 @@ #include "gt/intel_reset.h" #include "gt/selftest_engine_heartbeat.h" +#include "i915_jiffies.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/i915_config.c b/drivers/gpu/drm/i915/i915_config.c index 24e5bb8a670e..3cb615ffa96d 100644 --- a/drivers/gpu/drm/i915/i915_config.c +++ b/drivers/gpu/drm/i915/i915_config.c @@ -6,7 +6,7 @@ #include #include "i915_config.h" -#include "i915_utils.h" +#include "i915_jiffies.h" unsigned long i915_fence_context_timeout(const struct drm_i915_private *i915, u64 context) diff --git a/drivers/gpu/drm/i915/i915_jiffies.h b/drivers/gpu/drm/i915/i915_jiffies.h new file mode 100644 index 000000000000..18a4eaea897a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_jiffies.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_JIFFIES_H__ +#define __I915_JIFFIES_H__ + +#include + +static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) +{ + unsigned long j = msecs_to_jiffies(m); + + return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1); +} + +#endif /* __I915_JIFFIES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index a0c892e4c40d..44de4a4aa84a 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -100,40 +100,6 @@ static inline bool is_power_of_2_u64(u64 n) return (n != 0 && ((n & (n - 1)) == 0)); } -static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) -{ - unsigned long j = msecs_to_jiffies(m); - - return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1); -} - -/* - * If you need to wait X milliseconds between events A and B, but event B - * doesn't happen exactly after event A, you record the timestamp (jiffies) of - * when event A happened, then just before event B you call this function and - * pass the timestamp as the first argument, and X as the second argument. - */ -static inline void -wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) -{ - unsigned long target_jiffies, tmp_jiffies, remaining_jiffies; - - /* - * Don't re-read the value of "jiffies" every time since it may change - * behind our back and break the math. - */ - tmp_jiffies = jiffies; - target_jiffies = timestamp_jiffies + - msecs_to_jiffies_timeout(to_wait_ms); - - if (time_after(target_jiffies, tmp_jiffies)) { - remaining_jiffies = target_jiffies - tmp_jiffies; - while (remaining_jiffies) - remaining_jiffies = - schedule_timeout_uninterruptible(remaining_jiffies); - } -} - #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 9c276c9d0a75..8460f0a70d04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -30,6 +30,7 @@ #include "i915_driver.h" #include "i915_drv.h" +#include "i915_jiffies.h" #include "i915_selftest.h" #include "i915_wait_util.h" #include "igt_flush_test.h" -- cgit v1.2.3 From 7c15791d1986533818e1a53b78c4c116a944e660 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 18:17:10 +0300 Subject: drm/i915/display: create intel_display_utils.h Start a file for display specific generic utilities. Move KHz() and MHz() helpers there first. Reviewed-by: Luca Coelho Link: https://patch.msgid.link/dbdd1915466850293b9737b751170dd225197873.1761146196.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_backlight.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 1 + drivers/gpu/drm/i915/display/intel_display_utils.h | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 1 + drivers/gpu/drm/i915/i915_utils.h | 3 --- 6 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_display_utils.h diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 3b14f929825a..a68fdbd2acb9 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -13,7 +13,6 @@ #include #include "i915_reg.h" -#include "i915_utils.h" #include "intel_backlight.h" #include "intel_backlight_regs.h" #include "intel_connector.h" @@ -21,6 +20,7 @@ #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp_aux_backlight.h" #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 870140340342..cd9fc3d1f36a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -53,6 +53,7 @@ #include "intel_display_power.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dkl_phy.h" #include "intel_dkl_phy_regs.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.h b/drivers/gpu/drm/i915/display/intel_display_utils.h new file mode 100644 index 000000000000..0a2b603ea856 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_utils.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_DISPLAY_UTILS__ +#define __INTEL_DISPLAY_UTILS__ + +#define KHz(x) (1000 * (x)) +#define MHz(x) KHz(1000 * (x)) + +#endif /* __INTEL_DISPLAY_UTILS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 21a60b8c880e..4c0b943fe86f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -33,7 +33,6 @@ #include #include -#include "i915_utils.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" @@ -43,6 +42,7 @@ #include "intel_display_driver.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_dp_hdcp.h" #include "intel_dp_link_training.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 8ea96cc524a1..900a945ff8ab 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -32,6 +32,7 @@ #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dkl_phy.h" #include "intel_dkl_phy_regs.h" #include "intel_dpio_phy.h" diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 44de4a4aa84a..c1f978a7c141 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -100,9 +100,6 @@ static inline bool is_power_of_2_u64(u64 n) return (n != 0 && ((n & (n - 1)) == 0)); } -#define KHz(x) (1000 * (x)) -#define MHz(x) KHz(1000 * (x)) - void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint); static inline void __add_taint_for_CI(unsigned int taint) { -- cgit v1.2.3 From cd81a70d764386f5dd4423850d6250fdb4cb7660 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 18:17:11 +0300 Subject: drm/i915/display: add intel_display_run_as_guest() Add intel_display_utils.c for display utilities that need more than a header. Start off with intel_display_run_as_guest(). The implementation is intentional duplication of the i915_utils.h i915_run_as_guest(), with the idea that it's small enough to not matter. Reviewed-by: Luca Coelho Link: https://patch.msgid.link/469f9c41e0c3e3099314a3cf1a7671bf36ec8ffd.1761146196.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_display_utils.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display_utils.h | 6 ++++++ drivers/gpu/drm/i915/display/intel_pch.c | 4 ++-- drivers/gpu/drm/xe/Makefile | 1 + 5 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_display_utils.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index be439fb45cca..4d107d114690 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -256,6 +256,7 @@ i915-y += \ display/intel_display_rpm.o \ display/intel_display_rps.o \ display/intel_display_snapshot.o \ + display/intel_display_utils.o \ display/intel_display_wa.o \ display/intel_dmc.o \ display/intel_dmc_wl.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.c b/drivers/gpu/drm/i915/display/intel_display_utils.c new file mode 100644 index 000000000000..13d3999dd580 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_display_utils.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#ifdef CONFIG_X86 +#include +#endif + +#include "intel_display_utils.h" + +bool intel_display_run_as_guest(struct intel_display *display) +{ +#if IS_ENABLED(CONFIG_X86) + return !hypervisor_is_type(X86_HYPER_NATIVE); +#else + /* Not supported yet */ + return false; +#endif +} diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.h b/drivers/gpu/drm/i915/display/intel_display_utils.h index 0a2b603ea856..e54e69afd959 100644 --- a/drivers/gpu/drm/i915/display/intel_display_utils.h +++ b/drivers/gpu/drm/i915/display/intel_display_utils.h @@ -4,7 +4,13 @@ #ifndef __INTEL_DISPLAY_UTILS__ #define __INTEL_DISPLAY_UTILS__ +#include + +struct intel_display; + #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) +bool intel_display_run_as_guest(struct intel_display *display); + #endif /* __INTEL_DISPLAY_UTILS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c index 469e8a3cfb49..65359a36df48 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.c +++ b/drivers/gpu/drm/i915/display/intel_pch.c @@ -5,8 +5,8 @@ #include -#include "i915_utils.h" #include "intel_display_core.h" +#include "intel_display_utils.h" #include "intel_pch.h" #define INTEL_PCH_DEVICE_ID_MASK 0xff80 @@ -328,7 +328,7 @@ void intel_pch_detect(struct intel_display *display) "Display disabled, reverting to NOP PCH\n"); display->pch_type = PCH_NOP; } else if (!pch) { - if (i915_run_as_guest() && HAS_DISPLAY(display)) { + if (intel_display_run_as_guest(display) && HAS_DISPLAY(display)) { intel_virt_detect_pch(display, &id, &pch_type); display->pch_type = pch_type; } else { diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 58e1d68e495b..016b566ff04f 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -254,6 +254,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_power_map.o \ i915-display/intel_display_power_well.o \ i915-display/intel_display_trace.o \ + i915-display/intel_display_utils.o \ i915-display/intel_display_wa.o \ i915-display/intel_dkl_phy.o \ i915-display/intel_dmc.o \ -- cgit v1.2.3 From aaccf0ba7ebad8b140463a213878447a047ea08c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 18:17:12 +0300 Subject: drm/i915/display: add intel_display_vtd_active() Add intel_display_vtd_active() to utils. It's intentional duplication with i915_utils.h i915_vtd_active(), but reduces duplication with xe. Win some, lose some. Reviewed-by: Luca Coelho Link: https://patch.msgid.link/e022166af7c67f43904e2d0fc87bc5c13e0f1204.1761146196.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 4 ++-- drivers/gpu/drm/i915/display/intel_display.c | 8 +++----- drivers/gpu/drm/i915/display/intel_display_utils.c | 14 ++++++++++++++ drivers/gpu/drm/i915/display/intel_display_utils.h | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 4 +++- drivers/gpu/drm/xe/display/ext/i915_utils.c | 10 ---------- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 92a060e02cf3..640a40805f18 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -13,6 +13,7 @@ #include "intel_display_core.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" #include "intel_uncore.h" @@ -842,14 +843,13 @@ static unsigned int intel_bw_num_active_planes(struct intel_display *display, static unsigned int intel_bw_data_rate(struct intel_display *display, const struct intel_bw_state *bw_state) { - struct drm_i915_private *i915 = to_i915(display->drm); unsigned int data_rate = 0; enum pipe pipe; for_each_pipe(display, pipe) data_rate += bw_state->data_rate[pipe]; - if (DISPLAY_VER(display) >= 13 && i915_vtd_active(i915)) + if (DISPLAY_VER(display) >= 13 && intel_display_vtd_active(display)) data_rate = DIV_ROUND_UP(data_rate * 105, 100); return data_rate; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 94fff7c5f8dc..d269b0f2b7e9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -77,6 +77,7 @@ #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_display_wa.h" #include "intel_dmc.h" #include "intel_dp.h" @@ -831,9 +832,8 @@ static void intel_async_flip_vtd_wa(struct intel_display *display, static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - return crtc_state->uapi.async_flip && i915_vtd_active(i915) && + return crtc_state->uapi.async_flip && intel_display_vtd_active(display) && (DISPLAY_VER(display) == 9 || display->platform.broadwell || display->platform.haswell); } @@ -8356,7 +8356,5 @@ void i830_disable_pipe(struct intel_display *display, enum pipe pipe) bool intel_scanout_needs_vtd_wa(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - return IS_DISPLAY_VER(display, 6, 11) && i915_vtd_active(i915); + return IS_DISPLAY_VER(display, 6, 11) && intel_display_vtd_active(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.c b/drivers/gpu/drm/i915/display/intel_display_utils.c index 13d3999dd580..04d010f7c23e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_utils.c +++ b/drivers/gpu/drm/i915/display/intel_display_utils.c @@ -1,10 +1,15 @@ // SPDX-License-Identifier: MIT /* Copyright © 2025 Intel Corporation */ +#include + +#include + #ifdef CONFIG_X86 #include #endif +#include "intel_display_core.h" #include "intel_display_utils.h" bool intel_display_run_as_guest(struct intel_display *display) @@ -16,3 +21,12 @@ bool intel_display_run_as_guest(struct intel_display *display) return false; #endif } + +bool intel_display_vtd_active(struct intel_display *display) +{ + if (device_iommu_mapped(display->drm->dev)) + return true; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return intel_display_run_as_guest(display); +} diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.h b/drivers/gpu/drm/i915/display/intel_display_utils.h index e54e69afd959..af1e34bac720 100644 --- a/drivers/gpu/drm/i915/display/intel_display_utils.h +++ b/drivers/gpu/drm/i915/display/intel_display_utils.h @@ -12,5 +12,6 @@ struct intel_display; #define MHz(x) KHz(1000 * (x)) bool intel_display_run_as_guest(struct intel_display *display); +bool intel_display_vtd_active(struct intel_display *display); #endif /* __INTEL_DISPLAY_UTILS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 10ef3136dadc..6f2cc01ea4a0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -59,6 +59,7 @@ #include "intel_display_rpm.h" #include "intel_display_trace.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_display_wa.h" #include "intel_fbc.h" #include "intel_fbc_regs.h" @@ -1472,7 +1473,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, } /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ - if (i915_vtd_active(i915) && (display->platform.skylake || display->platform.broxton)) { + if (intel_display_vtd_active(display) && + (display->platform.skylake || display->platform.broxton)) { plane_state->no_fbc_reason = "VT-d enabled"; return 0; } diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c index 1421c2a7b64d..af9ec2abbaa1 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_utils.c +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -3,18 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#include "i915_drv.h" #include "i915_utils.h" -bool i915_vtd_active(struct drm_i915_private *i915) -{ - if (device_iommu_mapped(i915->drm.dev)) - return true; - - /* Running as a guest, we assume the host is enforcing VT'd */ - return i915_run_as_guest(); -} - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) /* i915 specific, just put here for shutting it up */ -- cgit v1.2.3 From b062cf5456bfdf8976ac279953af29456cac13e5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 22 Oct 2025 18:17:13 +0300 Subject: drm/i915/display: switch to intel_display_utils.h The remaining utils display needs from i915_utils.h are primarily MISSING_CASE() and fetch_and_zero(), with a couple of i915_inject_probe_failure() uses. To avoid excessive churn, add duplicates of MISSING_CASE() and fetch_and_zero() to intel_display_utils.h, and switch display to use the display utils. As long as there are display files that include i915_drv.h, which includes i915_utils.h, we'll need #ifndef guards for MISSING_CASE() and fetch_and_zero() in both utils headers. We can remove them once display no longer depends on i915_drv.h. A couple of files in display still need i915_utils.h for i915_inject_probe_failure(). Annotate this. They will be handled separately. Reviewed-by: Luca Coelho Link: https://patch.msgid.link/79f9e31ca64c8c045834d48e20ceb0c515d1e9e1.1761146196.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/g4x_dp.c | 2 +- drivers/gpu/drm/i915/display/i9xx_plane.c | 2 +- drivers/gpu/drm/i915/display/icl_dsi.c | 2 +- drivers/gpu/drm/i915/display/intel_acpi.c | 2 +- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- drivers/gpu/drm/i915/display/intel_bw.c | 1 - drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- drivers/gpu/drm/i915/display/intel_color.c | 2 +- drivers/gpu/drm/i915/display/intel_combo_phy.c | 2 +- drivers/gpu/drm/i915/display/intel_connector.c | 2 +- drivers/gpu/drm/i915/display/intel_cursor.c | 2 +- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 1 - drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 1 - drivers/gpu/drm/i915/display/intel_display_driver.c | 3 ++- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/display/intel_display_utils.h | 14 ++++++++++++++ drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_aux.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- drivers/gpu/drm/i915/display/intel_dpio_phy.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 1 - drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 2 +- drivers/gpu/drm/i915/display/intel_dvo.c | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- drivers/gpu/drm/i915/display/intel_fbc.c | 1 - drivers/gpu/drm/i915/display/intel_fdi.c | 2 +- drivers/gpu/drm/i915/display/intel_flipq.c | 6 +++--- drivers/gpu/drm/i915/display/intel_hdcp.c | 1 - drivers/gpu/drm/i915/display/intel_hdcp_gsc.c | 1 - drivers/gpu/drm/i915/display/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/display/intel_hotplug.c | 4 ++-- drivers/gpu/drm/i915/display/intel_hotplug_irq.c | 2 +- drivers/gpu/drm/i915/display/intel_lspcon.c | 2 +- drivers/gpu/drm/i915/display/intel_pch_refclk.c | 2 +- drivers/gpu/drm/i915/display/intel_pfit.c | 2 +- drivers/gpu/drm/i915/display/intel_pmdemand.c | 2 +- drivers/gpu/drm/i915/display/intel_pps.c | 2 +- drivers/gpu/drm/i915/display/intel_psr.c | 2 +- drivers/gpu/drm/i915/display/intel_qp_tables.c | 2 +- drivers/gpu/drm/i915/display/intel_snps_phy.c | 2 +- drivers/gpu/drm/i915/display/intel_sprite.c | 2 +- drivers/gpu/drm/i915/display/intel_tc.c | 2 +- drivers/gpu/drm/i915/display/intel_vblank.c | 2 +- drivers/gpu/drm/i915/display/intel_vdsc.c | 2 +- drivers/gpu/drm/i915/display/skl_scaler.c | 2 +- drivers/gpu/drm/i915/display/skl_universal_plane.c | 2 +- drivers/gpu/drm/i915/display/skl_watermark.c | 2 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 4 ++++ 52 files changed, 65 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index aa159f9ce12f..a3ff21b2f69f 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -11,7 +11,6 @@ #include "g4x_dp.h" #include "i915_reg.h" -#include "i915_utils.h" #include "intel_audio.h" #include "intel_backlight.h" #include "intel_connector.h" @@ -20,6 +19,7 @@ #include "intel_display_power.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_dp_aux.h" #include "intel_dp_link_training.h" diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 407deb5dfb57..6e39d7f2e0c2 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -11,7 +11,6 @@ #include #include "i915_reg.h" -#include "i915_utils.h" #include "i9xx_plane.h" #include "i9xx_plane_regs.h" #include "intel_atomic.h" @@ -19,6 +18,7 @@ #include "intel_display_irq.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 297368ff42a5..70d4c1bc70fc 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -35,7 +35,6 @@ #include #include "i915_reg.h" -#include "i915_utils.h" #include "icl_dsi.h" #include "icl_dsi_regs.h" #include "intel_atomic.h" @@ -48,6 +47,7 @@ #include "intel_ddi.h" #include "intel_de.h" #include "intel_display_regs.h" +#include "intel_display_utils.h" #include "intel_dsi.h" #include "intel_dsi_vbt.h" #include "intel_panel.h" diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index 1addd6288241..68c01932f7b4 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -11,10 +11,10 @@ #include -#include "i915_utils.h" #include "intel_acpi.h" #include "intel_display_core.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #define INTEL_DSM_REVISION_ID 1 /* For Calpella anyway... */ #define INTEL_DSM_FN_PLATFORM_MUX_INFO 1 /* No args */ diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 3596dce84c28..e2b51fa21d93 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -36,11 +36,11 @@ #include "soc/intel_rom.h" #include "i915_drv.h" -#include "i915_utils.h" #include "intel_display.h" #include "intel_display_core.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_gmbus.h" #define _INTEL_BIOS_PRIVATE diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 640a40805f18..3033c53e61d1 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -7,7 +7,6 @@ #include "i915_drv.h" #include "i915_reg.h" -#include "i915_utils.h" #include "intel_bw.h" #include "intel_crtc.h" #include "intel_display_core.h" diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index da97c38cca14..50dca70a0665 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -32,7 +32,6 @@ #include "hsw_ips.h" #include "i915_drv.h" #include "i915_reg.h" -#include "i915_utils.h" #include "intel_atomic.h" #include "intel_audio.h" #include "intel_cdclk.h" @@ -41,6 +40,7 @@ #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_mchbar_regs.h" #include "intel_pci_config.h" #include "intel_pcode.h" diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 51db70d07fae..1e97020e7304 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -24,12 +24,12 @@ #include -#include "i915_utils.h" #include "i9xx_plane_regs.h" #include "intel_color.h" #include "intel_color_regs.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dsb.h" #include "intel_vrr.h" diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 112749f97c26..f401558ac14e 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -5,12 +5,12 @@ #include -#include "i915_utils.h" #include "intel_combo_phy.h" #include "intel_combo_phy_regs.h" #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #define for_each_combo_phy(__display, __phy) \ for ((__phy) = PHY_A; (__phy) < I915_MAX_PHYS; (__phy)++) \ diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 6a55854db5b6..647e6e1f8c68 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -31,7 +31,7 @@ #include #include "i915_drv.h" -#include "i915_utils.h" +#include "i915_utils.h" /* for i915_inject_probe_failure() */ #include "intel_connector.h" #include "intel_display_core.h" #include "intel_display_debugfs.h" diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index c47c84935871..7aa14348aa6d 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -12,13 +12,13 @@ #include #include -#include "i915_utils.h" #include "intel_atomic.h" #include "intel_cursor.h" #include "intel_cursor_regs.h" #include "intel_de.h" #include "intel_display.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index a74c1be225ac..f57f6d90a9da 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -8,7 +8,6 @@ #include -#include "i915_utils.h" #include "intel_alpm.h" #include "intel_cx0_phy.h" #include "intel_cx0_phy_regs.h" @@ -16,6 +15,7 @@ #include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_hdmi.h" #include "intel_panel.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index cd9fc3d1f36a..1fac0ac6273b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -35,7 +35,6 @@ #include #include "i915_reg.h" -#include "i915_utils.h" #include "icl_dsi.h" #include "intel_alpm.h" #include "intel_audio.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index a238be5bc455..629502947222 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -3,12 +3,12 @@ * Copyright © 2020 Intel Corporation */ -#include "i915_utils.h" #include "intel_cx0_phy.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" /* HDMI/DVI modes ignore everything but the last 2 items. So we share diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d269b0f2b7e9..0f11769cf864 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -51,7 +51,6 @@ #include "i915_config.h" #include "i915_drv.h" #include "i915_reg.h" -#include "i915_utils.h" #include "i9xx_plane.h" #include "i9xx_plane_regs.h" #include "i9xx_wm.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 38672d2896e3..eb0727b9a0f6 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -18,7 +18,7 @@ #include #include "i915_drv.h" -#include "i915_utils.h" +#include "i915_utils.h" /* for i915_inject_probe_failure() */ #include "i9xx_wm.h" #include "intel_acpi.h" #include "intel_atomic.h" @@ -35,6 +35,7 @@ #include "intel_display_irq.h" #include "intel_display_power.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_display_wa.h" #include "intel_dkl_phy.h" #include "intel_dmc.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index da4babfd6bcb..ebe6225470d0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -11,7 +11,6 @@ #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" -#include "i915_utils.h" #include "intel_backlight_regs.h" #include "intel_cdclk.h" #include "intel_clock_gating.h" @@ -23,6 +22,7 @@ #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dmc.h" #include "intel_mchbar_regs.h" #include "intel_pch_refclk.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_utils.h b/drivers/gpu/drm/i915/display/intel_display_utils.h index af1e34bac720..2a18f160320c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_utils.h +++ b/drivers/gpu/drm/i915/display/intel_display_utils.h @@ -4,10 +4,24 @@ #ifndef __INTEL_DISPLAY_UTILS__ #define __INTEL_DISPLAY_UTILS__ +#include #include struct intel_display; +#ifndef MISSING_CASE +#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ + __stringify(x), (long)(x)) +#endif + +#ifndef fetch_and_zero +#define fetch_and_zero(ptr) ({ \ + typeof(*ptr) __T = *(ptr); \ + *(ptr) = (typeof(*ptr))0; \ + __T; \ +}) +#endif + #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 5ba17e66d90b..0bddb20a7c86 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -30,13 +30,13 @@ #include #include "i915_reg.h" -#include "i915_utils.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_power_well.h" #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dmc.h" #include "intel_dmc_regs.h" #include "intel_flipq.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0277ac5fc64f..0ec82fcbcf48 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -51,7 +51,6 @@ #include #include "g4x_dp.h" -#include "i915_utils.h" #include "intel_alpm.h" #include "intel_atomic.h" #include "intel_audio.h" @@ -65,6 +64,7 @@ #include "intel_de.h" #include "intel_display_driver.h" #include "intel_display_jiffies.h" +#include "intel_display_utils.h" #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 829a7c0fbe4f..2e7dbaf511b9 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -5,9 +5,9 @@ #include -#include "i915_utils.h" #include "intel_de.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_dp_aux.h" #include "intel_dp_aux_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 316b42b5a946..aad5fe14962f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -27,10 +27,10 @@ #include #include -#include "i915_utils.h" #include "intel_display_core.h" #include "intel_display_jiffies.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_dp_link_training.h" #include "intel_encoder.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 3f77ad92c156..5df6347a420d 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -24,13 +24,13 @@ #include #include "bxt_dpio_phy_regs.h" -#include "i915_utils.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_power_well.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_utils.h" #include "intel_dp.h" #include "intel_dpio_phy.h" #include "vlv_dpio_phy_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 900a945ff8ab..92c433f7b7e2 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -27,7 +27,6 @@ #include #include "bxt_dpio_phy_regs.h" -#include "i915_utils.h" #include "intel_cx0_phy.h" #include "intel_de.h" #include "intel_display_regs.h" diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index fac8729e442c..31edf57a296f 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -38,10 +38,10 @@ #include #include