summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/display/dc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc')
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c200
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/Makefile25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c341
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c376
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c47
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c812
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h106
57 files changed, 1271 insertions, 1008 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 6767fab55c26..352e9afb85c6 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -100,3 +100,24 @@ void convert_float_matrix(
matrix[i] = (uint16_t)reg_value;
}
}
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+ uint32_t remainder = 0;
+ while (b != 0) {
+ remainder = a % b;
+ a = b;
+ b = remainder;
+ }
+ return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den)
+{
+ uint32_t gcd = 0;
+
+ gcd = find_gcd(num, den);
+ *out_num = num / gcd;
+ *out_den = den / gcd;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index ade785c4fdc7..81da4e6f7a1a 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -38,6 +38,9 @@ void convert_float_matrix(
struct fixed31_32 *flt,
uint32_t buffer_size);
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den);
+
static inline unsigned int log_2(unsigned int num)
{
return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 4c76091fd1f2..f276abb63bcd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
break;
}
- case AMDGPU_FAMILY_GC_11_0_2: {
+ case AMDGPU_FAMILY_GC_11_0_1: {
struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
if (clk_mgr == NULL) {
@@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
dcn32_clk_mgr_destroy(clk_mgr);
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dcn314_clk_mgr_destroy(clk_mgr);
break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 0202dc682682..ca6dfd2d7561 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -24,10 +24,9 @@
*/
#include "dccg.h"
-#include "clk_mgr_internal.h"
+#include "rn_clk_mgr.h"
#include "dcn20/dcn20_clk_mgr.h"
-#include "rn_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dce100/dce_clk_mgr.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
index 2e088c5171b2..f1319957e400 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
@@ -28,6 +28,7 @@
#include "clk_mgr.h"
#include "dm_pp_smu.h"
+#include "clk_mgr_internal.h"
extern struct wm_table ddr4_wm_table_gs;
extern struct wm_table lpddr4_wm_table_gs;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index ee99974b3b62..beb025cd3dc2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn314_smu_enable_pme_wa(clk_mgr);
}
-void dcn314_init_clocks(struct clk_mgr *clk_mgr)
-{
- memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- // Assumption is that boot state always supports pstate
- clk_mgr->clks.p_state_change_support = true;
- clk_mgr->clks.prev_p_state_change_support = true;
- clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
- clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
-}
-
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b)
{
@@ -425,7 +415,7 @@ static struct wm_table lpddr5_wm_table = {
}
};
-static DpmClocks_t dummy_clocks;
+static DpmClocks314_t dummy_clocks;
static struct dcn314_watermarks dummy_wms = { 0 };
@@ -510,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
struct dcn314_smu_dpm_clks *smu_dpm_clks)
{
- DpmClocks_t *table = smu_dpm_clks->dpm_clks;
+ DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
if (!clk_mgr->smu_ver)
return;
@@ -527,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
}
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+
+ default:
+ break;
+ }
+ return 1;
+}
+
static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
{
uint32_t max = 0;
@@ -540,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
return max;
}
-static unsigned int find_clk_for_voltage(
- const DpmClocks_t *clock_table,
- const uint32_t clocks[],
- unsigned int voltage)
-{
- int i;
- int max_voltage = 0;
- int clock = 0;
-
- for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
- if (clock_table->SocVoltage[i] == voltage) {
- return clocks[i];
- } else if (clock_table->SocVoltage[i] >= max_voltage &&
- clock_table->SocVoltage[i] < voltage) {
- max_voltage = clock_table->SocVoltage[i];
- clock = clocks[i];
- }
- }
-
- ASSERT(clock);
- return clock;
-}
-
static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
struct integrated_info *bios_info,
- const DpmClocks_t *clock_table)
+ const DpmClocks314_t *clock_table)
{
- int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
- uint32_t max_dispclk = 0, max_dppclk = 0;
-
- j = -1;
-
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
- /* Find lowest DPM, FCLK is filled in reverse order*/
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ int i;
- for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
- if (clock_table->DfPstateTable[i].FClk != 0) {
- j = i;
- break;
+ /* Find highest valid fclk pstate */
+ for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+ clock_table->DfPstateTable[i].FClk > max_fclk) {
+ max_fclk = clock_table->DfPstateTable[i].FClk;
+ max_pstate = i;
}
}
- if (j == -1) {
- /* clock table is all 0s, just use our own hardcode */
- ASSERT(0);
- return;
- }
-
- bw_params->clk_table.num_entries = j + 1;
+ /* We expect the table to contain at least one valid fclk entry. */
+ ASSERT(is_valid_clock_value(max_fclk));
- /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ /* Dispclk and dppclk can be max at any voltage, same number of levels for both */
if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
} else {
+ /* Invalid number of entries in the table from PMFW. */
ASSERT(0);
}
- for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
- bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
- bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
- bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
- switch (clock_table->DfPstateTable[j].WckRatio) {
- case WCK_RATIO_1_2:
- bw_params->clk_table.entries[i].wck_ratio = 2;
- break;
- case WCK_RATIO_1_4:
- bw_params->clk_table.entries[i].wck_ratio = 4;
- break;
- default:
- bw_params->clk_table.entries[i].wck_ratio = 1;
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+ int j;
+
+ for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+ clock_table->DfPstateTable[j].FClk < min_fclk &&
+ clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+ min_fclk = clock_table->DfPstateTable[j].FClk;
+ min_pstate = j;
+ }
}
- bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
- bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[min_pstate].WckRatio);
+ };
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[max_pstate].WckRatio);
+ i++;
}
+ bw_params->clk_table.num_entries = i--;
+
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
bw_params->vram_type = bios_info->memory_type;
- bw_params->num_channels = bios_info->ma_channel_number;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
@@ -641,7 +689,7 @@ static struct clk_mgr_funcs dcn314_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn314_update_clocks,
- .init_clocks = dcn314_init_clocks,
+ .init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn314_enable_pme_wa,
.are_clock_states_equal = dcn314_are_clock_states_equal,
.notify_wm_ranges = dcn314_notify_wm_ranges
@@ -681,10 +729,10 @@ void dcn314_clk_mgr_construct(
}
ASSERT(clk_mgr->smu_wm_set.wm_set);
- smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
+ smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
- sizeof(DpmClocks_t),
+ sizeof(DpmClocks314_t),
&smu_dpm_clks.mc_address.quad_part);
if (smu_dpm_clks.dpm_clks == NULL) {
@@ -729,7 +777,7 @@ void dcn314_clk_mgr_construct(
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
dcn314_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index c695a4498c50..171f84340eb2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -42,7 +42,7 @@ struct clk_mgr_dcn314 {
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b);
-void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
index a7958dc96581..047d19ea919c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -36,6 +36,37 @@ typedef enum {
WCK_RATIO_MAX
} WCK_RATIO_e;
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
struct dcn314_watermarks {
// Watermarks
WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
@@ -43,7 +74,7 @@ struct dcn314_watermarks {
};
struct dcn314_smu_dpm_clks {
- DpmClocks_t *dpm_clks;
+ DpmClocks314_t *dpm_clks;
union large_integer mc_address;
};
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e42f44fc1c08..aeecca68dea7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1074,8 +1074,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
struct dc_stream_state *old_stream =
dc->current_state->res_ctx.pipe_ctx[i].stream;
bool should_disable = true;
- bool pipe_split_change =
- context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+ bool pipe_split_change = false;
+
+ if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+ (dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+ else
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
for (j = 0; j < context->stream_count; j++) {
if (old_stream == context->streams[j]) {
@@ -3229,7 +3236,7 @@ static void commit_planes_for_stream(struct dc *dc,
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program &&
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
if (should_use_dmub_lock(stream->link)) {
@@ -3247,7 +3254,6 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable(
top_pipe_to_program->stream_res.tg);
}
- }
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
@@ -3466,7 +3472,7 @@ static void commit_planes_for_stream(struct dc *dc,
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
top_pipe_to_program->stream_res.tg,
@@ -3493,21 +3499,19 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable(
top_pipe_to_program->stream_res.tg);
}
- }
- if (update_type != UPDATE_TYPE_FAST) {
+ if (update_type != UPDATE_TYPE_FAST)
dc->hwss.post_unlock_program_front_end(dc, context);
- /* Since phantom pipe programming is moved to post_unlock_program_front_end,
- * move the SubVP lock to after the phantom pipes have been setup
- */
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
- } else {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
- }
+ /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+ * move the SubVP lock to after the phantom pipes have been setup
+ */
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+ } else {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
}
// Fire manual trigger only when bottom plane is flipped
@@ -4292,7 +4296,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc)
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 &&
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
@@ -4340,6 +4344,7 @@ void dc_enable_dmub_outbox(struct dc *dc)
struct dc_context *dc_ctx = dc->ctx;
dmub_enable_outbox_notification(dc_ctx->dmub_srv);
+ DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__);
}
/**
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9e51338441d0..66d2ae7aacf5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3372,7 +3372,7 @@ bool dc_link_setup_psr(struct dc_link *link,
switch(link->ctx->asic_id.chip_family) {
case FAMILY_YELLOW_CARP:
case AMDGPU_FAMILY_GC_10_3_6:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
if(!dc->debug.disable_z10)
psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ffc0f1c0ea93..7dbab15bfa68 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev))
dc_version = DCN_VERSION_3_21;
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dc_version = DCN_VERSION_3_14;
break;
default:
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 8e1e40083ec8..5908b60db313 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.196"
+#define DC_VER "3.2.198"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -213,6 +213,7 @@ struct dc_caps {
uint32_t cache_num_ways;
uint16_t subvp_fw_processing_delay_us;
uint16_t subvp_prefetch_end_to_mall_start_us;
+ uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
uint16_t subvp_pstate_allow_width_us;
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
@@ -352,6 +353,7 @@ struct dc_config {
bool use_pipe_ctx_sync_logic;
bool ignore_dpref_ss;
bool enable_mipi_converter_optimization;
+ bool use_default_clock_table;
};
enum visual_confirm {
@@ -609,6 +611,7 @@ struct dc_bounding_box_overrides {
int percent_of_ideal_drambw;
int dram_clock_change_latency_ns;
int dummy_clock_change_latency_ns;
+ int fclk_clock_change_latency_ns;
/* This forces a hard min on the DCFCLK we use
* for DML. Unlike the debug option for forcing
* DCFCLK, this override affects watermark calculations
@@ -751,6 +754,7 @@ struct dc_debug_options {
uint32_t mst_start_top_delay;
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
+ bool dml_disallow_alternate_prefetch_modes;
bool use_legacy_soc_bb_mechanism;
bool exit_idle_opt_for_cursor_updates;
bool enable_single_display_2to1_odm_policy;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 2d61c2a91cee..09b304507bad 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -29,6 +29,7 @@
#include "dm_helpers.h"
#include "dc_hw_types.h"
#include "core_types.h"
+#include "../basics/conversion.h"
#define CTX dc_dmub_srv->ctx
#define DC_LOGGER CTX->logger
@@ -275,8 +276,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
union dmub_rb_cmd cmd = { 0 };
cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
- // TODO: Uncomment once FW headers are promoted
- //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
+ cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
@@ -601,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ uint32_t out_num, out_den;
pipe_data->mode = SUBVP;
pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
@@ -612,6 +613,16 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx;
+ pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
+
+ /* Calculate the scaling factor from the src and dst height.
+ * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+ * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+ */
+ reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den);
+ // TODO: Uncomment below lines once DMCUB include headers are promoted
+ //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+ //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
// Prefetch lines is equal to VACTIVE + BP + VSYNC
pipe_data->pipe_config.subvp_data.prefetch_lines =
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index a0af0f6afeef..9544abf75e84 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -344,6 +344,7 @@ enum dc_detect_reason {
DETECT_REASON_HPDRX,
DETECT_REASON_FALLBACK,
DETECT_REASON_RETRAIN,
+ DETECT_REASON_TDR,
};
bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 213de8cabfad..165392380842 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper (
switch (pix_clk_params->color_depth) {
case COLOR_DEPTH_101010:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_121212:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_161616:
actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index d4a6504dfe00..db7ca4b0cdb9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -361,8 +361,6 @@ void dpp1_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index b54c12400323..564e061ccb58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -278,9 +278,6 @@ void hubp1_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index bed783747f16..5b5d952b2b8c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -110,6 +110,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
*/
if (pipe_ctx->top_pipe ||
!pipe_ctx->stream ||
+ !pipe_ctx->plane_state ||
!tg->funcs->is_tg_enabled(tg))
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 769974375b4b..8e9384094f6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index e1a9a45b03b6..3fc300cd1ce9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -465,6 +465,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
OTG_CLOCK_ON, 1,
1, 1000);
} else {
+
+ //last chance to clear underflow, otherwise, it will always there due to clock is off.
+ if (optc->funcs->is_optc_underflow_occurred(optc) == true)
+ optc->funcs->clear_optc_underflow(optc);
+
REG_UPDATE_2(OTG_CLOCK_CONTROL,
OTG_CLOCK_GATE_DIS, 0,
OTG_CLOCK_EN, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index ea1f14af0db7..eaa7032f0f1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -166,8 +166,6 @@ static void dpp2_cnv_setup (
select = DCN2_ICSC_SELECT_ICSC_A;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 936af65381ef..9570c2118ccc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -463,9 +463,6 @@ void hubp2_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 3d307dd58e9a..116f67a0b989 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038..5752271f22df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
void dcn21_dchvm_init(struct hubbub *hubbub)
{
struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
- uint32_t riommu_active;
+ uint32_t riommu_active, prefetch_done;
int i;
+ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
+
+ if (prefetch_done) {
+ hubbub->riommu_active = true;
+ return;
+ }
//Init DCHVM block
REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index 77b00f86c216..4a668d6563df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -244,8 +244,6 @@ void dpp3_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index 6a4dcafb9bba..dc3e8df706b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr(
VMID, address->vmid);
if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 0a67f8a5656d..d97076648acb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -372,7 +372,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
+ if (eng_id <= ENGINE_ID_DIGB) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
index 7c77c71591a0..82c3b3ac1f0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
@@ -162,7 +162,8 @@
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, AIP_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, ACM_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_ENABLE, mask_sh),\
- SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh)
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh),\
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_HBLANK_CONTROL, HBLANK_MINIMUM_SYMBOL_WIDTH, mask_sh)
#define DCN3_1_HPO_DP_STREAM_ENC_REG_FIELD_LIST(type) \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 468a893ff785..aedff18aff56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -2153,7 +2153,7 @@ static bool dcn31_resource_construct(
pool->base.usb4_dpia_count = 4;
}
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2)
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1)
pool->base.usb4_dpia_count = 4;
/* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
index 41f8ec99da6b..901436591ed4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn31_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_1_ip;
-extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc;
struct dcn31_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index e3b5a95e03b1..702c28c2560e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -13,31 +13,6 @@
DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \
dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
index 755c715ad8dc..39931d48f385 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
@@ -343,7 +343,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
{
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int odm_combine_factor = 0;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ bool two_pix_per_container = false;
+ two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
if (is_dp_128b_132b_signal(pipe_ctx)) {
@@ -355,16 +358,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
else
*k2_div = PIXEL_RATE_DIV_BY_4;
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
- } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
- *k1_div = PIXEL_RATE_DIV_BY_2;
- *k2_div = PIXEL_RATE_DIV_BY_2;
} else {
- if (odm_combine_factor == 1)
- *k2_div = PIXEL_RATE_DIV_BY_4;
- else if (odm_combine_factor == 2)
+ *k1_div = PIXEL_RATE_DIV_BY_1;
+ *k2_div = PIXEL_RATE_DIV_BY_4;
+ if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy)
*k2_div = PIXEL_RATE_DIV_BY_2;
}
}
@@ -374,3 +374,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
return odm_combine_factor;
}
+
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+{
+ uint32_t pix_per_cycle = 1;
+ uint32_t odm_combine_factor = 1;
+
+ if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
+ return;
+
+ odm_combine_factor = get_odm_config(pipe_ctx, NULL);
+ if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1
+ || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+ pix_per_cycle = 2;
+
+ if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
+ pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+ pix_per_cycle);
+}
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) &&
+ dc->debug.enable_dp_dig_pixel_rate_div_policy)
+ return true;
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
index be0f5e4d48e1..d014580592ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
@@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
#endif /* __DC_HWSS_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
index b9debeb081fd..fcf67eb3478f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
@@ -145,6 +145,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
.set_shaper_3dlut = dcn20_set_shaper_3dlut,
.setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values,
+ .set_pixels_per_cycle = dcn314_set_pixels_per_cycle,
+ .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy,
};
void dcn314_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 63861cdfb09f..85f32206a766 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -70,6 +70,7 @@
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn314/dcn314_fpu.h"
#include "dcn314/dcn314_dccg.h"
#include "dcn10/dcn10_resource.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C
#define DC_LOGGER_INIT(logger)
-#define DCN3_14_DEFAULT_DET_SIZE 384
-#define DCN3_14_MAX_DET_SIZE 384
-#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
-#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
-struct _vcs_dpi_ip_params_st dcn3_14_ip = {
- .VBlankNomDefaultUS = 668,
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
- .config_return_buffer_size_in_kbytes = 1792,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 4,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 48,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 8,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 600.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 625.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.5,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
DCN31_CLK_SRC_PLL1,
@@ -1402,7 +1254,7 @@ static struct stream_encoder *dcn314_stream_encoder_create(
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
+ if (eng_id < ENGINE_ID_DIGF) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
@@ -1447,7 +1299,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
* VPG[8] -> HPO_DP[2]
* VPG[9] -> HPO_DP[3]
*/
- vpg_inst = hpo_dp_inst + 6;
+ //Uses offset index 5-8, but actually maps to vpg_inst 6-9
+ vpg_inst = hpo_dp_inst + 5;
/* Mapping of APG register blocks to HPO DP block instance:
* APG[0] -> HPO_DP[0]
@@ -1793,109 +1646,16 @@ static struct clock_source *dcn31_clock_source_create(
return NULL;
}
-static bool is_dual_plane(enum surface_pixel_format format)
-{
- return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
static int dcn314_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate)
{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
- bool upscaled = false;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing;
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
- && pipe->stream->adjust.v_total_min > timing->v_total)
- pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
-
- if (pipe->plane_state &&
- (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
- pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
- upscaled = true;
-
- /*
- * Immediate flip can be set dynamically after enabling the plane.
- * We need to require support for immediate flip or underflow can be
- * intermittently experienced depending on peak b/w requirements.
- */
- pipes[pipe_cnt].pipe.src.immediate_flip = true;
-
- pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
- pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
- pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
- pipes[pipe_cnt].pipe.src.dcc_rate = 3;
- pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-
- if (pipes[pipe_cnt].dout.dsc_enable) {
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_888:
- pipes[pipe_cnt].dout.dsc_input_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- pipes[pipe_cnt].dout.dsc_input_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- break;
- default:
- ASSERT(0);
- break;
- }
- }
-
- pipe_cnt++;
- }
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
-
- dc->config.enable_4to1MPC = false;
- if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
- if (is_dual_plane(pipe->plane_state->format)
- && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
- dc->config.enable_4to1MPC = true;
- } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
- /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- pipes[0].pipe.src.unbounded_req_mode = true;
- }
- } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
- && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
- } else if (context->stream_count >= 3 && upscaled) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe->stream)
- continue;
+ int pipe_cnt;
- if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
- pipe->stream->apply_seamless_boot_optimization) {
-
- if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
- context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
- break;
- }
- }
- }
+ DC_FP_START();
+ pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate);
+ DC_FP_END();
return pipe_cnt;
}
@@ -1906,88 +1666,9 @@ static struct dc_cap_funcs cap_funcs = {
static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp;
- unsigned int i, closest_clk_lvl;
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
- dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
-
- if (bw_params->num_channels > 0)
- dcn3_14_soc.num_chans = bw_params->num_channels;
-
- ASSERT(dcn3_14_soc.num_chans);
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_14_soc.num_states - 1;
- }
-
- clock_tmp[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
- clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
-
- if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
- clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_14_soc.clock_limits[i] = clock_tmp[i];
- if (clk_table->num_entries)
- dcn3_14_soc.num_states = clk_table->num_entries;
- }
-
- if (max_dispclk_mhz) {
- dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+ DC_FP_START();
+ dcn314_update_bw_bounding_box_fpu(dc, bw_params);
+ DC_FP_END();
}
static struct resource_funcs dcn314_res_pool_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
index c41108847ce0..0dd3153aa5c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
@@ -29,6 +29,9 @@
#include "core_types.h"
+extern struct _vcs_dpi_ip_params_st dcn3_14_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc;
+
#define TO_DCN314_RES_POOL(pool)\
container_of(pool, struct dcn314_resource_pool, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
index 39929fa67a51..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn315_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_15_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_15_soc;
struct dcn315_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
index 0dc5a6c13ae7..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn316_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_16_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_16_soc;
struct dcn316_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index d38341f68b17..ebd3945c71f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -250,6 +250,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
uint32_t total_lines = 0;
uint32_t lines_per_way = 0;
uint32_t num_ways = 0;
+ uint32_t prev_addr_low = 0;
for (i = 0; i < ctx->stream_count; i++) {
stream = ctx->streams[i];
@@ -267,10 +268,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
plane = ctx->stream_status[i].plane_states[j];
// Calculate total surface size
- surface_size = plane->plane_size.surface_pitch *
+ if (prev_addr_low != plane->address.grph.addr.u.low_part) {
+ /* if plane address are different from prev FB, then userspace allocated separate FBs*/
+ surface_size += plane->plane_size.surface_pitch *
plane->plane_size.surface_size.height *
(plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
+ prev_addr_low = plane->address.grph.addr.u.low_part;
+ } else {
+ /* We have the same fb for all the planes.
+ * Xorg always creates one giant fb that holds all surfaces,
+ * so allocating it once is sufficient.
+ * */
+ continue;
+ }
// Convert surface size + starting address to number of cache lines required
// (alignment accounted for)
cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
@@ -320,7 +331,10 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
union dmub_rb_cmd cmd;
- uint8_t ways;
+ uint8_t ways, i;
+ int j;
+ bool stereo_in_use = false;
+ struct dc_plane_state *plane = NULL;
if (!dc->ctx->dmub_srv)
return false;
@@ -349,7 +363,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
* and configure HUBP's to fetch from MALL
*/
ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
- if (ways <= dc->caps.cache_num_ways) {
+
+ /* MALL not supported with Stereo3D. If any plane is using stereo,
+ * don't try to enter MALL.
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ plane = dc->current_state->stream_status[i].plane_states[j];
+
+ if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) {
+ stereo_in_use = true;
+ break;
+ }
+ }
+ if (stereo_in_use)
+ break;
+ }
+ if (ways <= dc->caps.cache_num_ways && !stereo_in_use) {
memset(&cmd, 0, sizeof(cmd));
cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
@@ -683,9 +713,11 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
} else {
+ // MALL not supported with Stereo3D
hubp->funcs->hubp_update_mall_sel(hubp,
num_ways <= dc->caps.cache_num_ways &&
- pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0,
+ pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
+ pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0,
cache_cursor);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index eff1f4e17689..1fad7b48bd5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -281,7 +281,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
.lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
.enable_optc_clock = optc1_enable_optc_clock,
- .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted
+ .set_drr = optc32_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
.set_vtotal_min_max = optc3_set_vtotal_min_max,
.set_static_screen_control = optc1_set_static_screen_control,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 9a26d24b579f..8b887b552f2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -867,7 +867,7 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
@@ -2051,6 +2051,7 @@ static bool dcn32_resource_construct(
dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
dc->caps.subvp_vertical_int_margin_us = 30;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index b3f8503cea9c..955f52e6064d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
- mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable;
+ mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable;
// For bytes required in MALL, calculate based on number of MBlks required
num_mblks = (mall_region_pixels * bytes_per_pixel +
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 8157e40d2c7e..c8b7d6ff38f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
@@ -1662,8 +1662,9 @@ static bool dcn321_resource_construct(
dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
-
+ dc->caps.subvp_vertical_int_margin_us = 30;
dc->caps.max_slave_planes = 1;
dc->caps.max_slave_yuv_planes = 1;
dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 359f6e9a1da0..86a3b5bfd699 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,7 +61,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
@@ -71,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
@@ -82,7 +82,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
@@ -131,6 +130,7 @@ DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
DML += dsc/rc_calc_fpu.o
DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index ca44df4fca74..d34e0f1314d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -30,6 +30,7 @@
#include "dchubbub.h"
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
#include "dcn20_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 7ef66e511ec8..d211cf6d234c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -26,6 +26,7 @@
#include "clk_mgr.h"
#include "dcn20/dcn20_resource.h"
#include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn301_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index e36cfa5985ea..149a1b17cdf3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -25,6 +25,9 @@
#include "resource.h"
#include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn31_fpu.h"
@@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
@@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.sr_exit_time_us = 9.0,
.sr_enter_plus_exit_time_us = 11.0,
.sr_exit_z8_time_us = 50.0,
@@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 3fab19134480..d63b4209b14c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,7 +26,7 @@
#include "dc.h"
#include "dc_link.h"
#include "../display_mode_lib.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 66b82e4f05c6..35d10b4d018b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,7 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644
index 000000000000..34a5d0f87b5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/display_mode_vba.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dcn3_14_soc.clock_limits;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
+
+ dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->num_channels > 0)
+ dcn3_14_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(dcn3_14_soc.num_chans);
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_14_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_14_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_14_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool upscaled = false;
+
+ dc_assert_fp_enabled();
+
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
+ && pipe->stream->adjust.v_total_min > timing->v_total)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the plane.
+ * We need to require support for immediate flip or underflow can be
+ * intermittently experienced depending on peak b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[pipe_cnt].pipe.src.gpuvm = true;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+ dc->config.enable_4to1MPC = false;
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+ /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+ && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644
index 000000000000..d32c5bb99f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 66453546e24f..8118cfc5b405 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
// DML calculation for MALL region doesn't take into account FW delay
// and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
- pstate_width_fw_delay_lines;
+ pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
// For backporch of phantom pipe, use vstartup of the main pipe
phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -490,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
phantom_stream->timing.v_front_porch +
phantom_stream->timing.v_sync_width +
phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
}
/**
@@ -983,9 +987,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* DML favors voltage over p-state, but we're more interested in
* supporting p-state over voltage. We can't support p-state in
* prefetch mode > 0 so try capping the prefetch mode to start.
+ * Override present for testing.
*/
- context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ if (dc->debug.dml_disallow_alternate_prefetch_modes)
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_uclk_fclk_and_stutter;
+ else
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
/* This may adjust vlevel and maxMpcComb */
if (*vlevel < context->bw_ctx.dml.soc.num_states)
@@ -1014,7 +1024,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
* enough to support MCLK switching.
*/
- if (*vlevel == context->bw_ctx.dml.soc.num_states) {
+ if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+ dm_prefetch_support_uclk_fclk_and_stutter) {
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_stutter;
/* There are params (such as FabricClock) that need to be recalculated
@@ -1344,7 +1356,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
int split[MAX_PIPES] = { 0 };
bool merge[MAX_PIPES] = { false };
bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
+ int pipe_cnt, i, pipe_idx;
+ int vlevel = context->bw_ctx.dml.soc.num_states;
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
dc_assert_fp_enabled();
@@ -1373,17 +1386,22 @@ bool dcn32_internal_validate_bw(struct dc *dc,
DC_FP_END();
}
- if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ if (fast_validate ||
+ (dc->debug.dml_disallow_alternate_prefetch_modes &&
+ (vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
/*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
+ * If dml_disallow_alternate_prefetch_modes is false, then we have already
+ * tried alternate prefetch modes during full validation.
+ *
+ * If mode is unsupported or there is no p-state support, then
+ * fall back to favouring voltage.
*
- * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if
- * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+ * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
*/
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
- dm_prefetch_support_fclk_and_stutter;
+ dm_prefetch_support_fclk_and_stutter;
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
@@ -2098,6 +2116,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_2_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 890612db08dc..cb2025771646 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -221,7 +221,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
// VBA_DELTA
// Calculate DET size, swath height
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -461,7 +460,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
v->SurfaceSizeInMALL,
@@ -757,9 +755,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
- v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
- v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k],
mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater,
mode_lib->vba.DPPCLKDelaySCL,
@@ -1167,7 +1163,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
mode_lib->vba.USRRetrainingRequiredFinal,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
@@ -1952,7 +1947,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2549,7 +2543,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2749,7 +2742,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
mode_lib->vba.SurfaceSizeInMALL,
@@ -3266,7 +3258,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.NoTimeForPrefetch[i][j][k] =
dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
mode_lib->vba.DSCDelayPerState[i][k],
@@ -3566,7 +3557,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
mode_lib->vba.USRRetrainingRequiredFinal,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.PrefetchModePerState[i][j],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 07f8f3b8626b..05fc14a47fba 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -391,7 +391,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes(
} // CalculateBytePerPixelAndBlockSizes
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -456,10 +455,18 @@ void dml32_CalculateSwathAndDETConfiguration(
bool ViewportSizeSupportPerSurface[],
bool *ViewportSizeSupport)
{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
unsigned int k;
-
- st_vars->TotalActiveDPP = 0;
- st_vars->NoChromaSurfaces = true;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -494,43 +501,43 @@ void dml32_CalculateSwathAndDETConfiguration(
DPPPerSurface,
/* Output */
- st_vars->SwathWidthdoubleDPP,
- st_vars->SwathWidthdoubleDPPChroma,
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
SwathWidth,
SwathWidthChroma,
- st_vars->MaximumSwathHeightY,
- st_vars->MaximumSwathHeightC,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
swath_width_luma_ub,
swath_width_chroma_ub);
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+ RoundedUpMaxSwathSizeBytesC[k]);
#endif
if (SourcePixelFormat[k] == dm_420_10) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
- st_vars->NoChromaSurfaces = false;
+ NoChromaSurfaces = false;
}
}
@@ -540,10 +547,10 @@ void dml32_CalculateSwathAndDETConfiguration(
// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
- *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
if (*CompBufReservedSpaceNeedAdjustment == 1) {
- *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
}
#ifdef __DML_VBA_DEBUG__
@@ -551,7 +558,7 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
#endif
- *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
dml32_CalculateDETBufferSize(DETSizeOverride,
UseMALLForPStateChange,
@@ -566,8 +573,8 @@ void dml32_CalculateSwathAndDETConfiguration(
SourcePixelFormat,
ReadBandwidthLuma,
ReadBandwidthChroma,
- st_vars->RoundedUpMaxSwathSizeBytesY,
- st_vars->RoundedUpMaxSwathSizeBytesC,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
DPPPerSurface,
/* Output */
@@ -575,7 +582,7 @@ void dml32_CalculateSwathAndDETConfiguration(
CompressedBufferSizeInkByte);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
@@ -586,42 +593,42 @@ void dml32_CalculateSwathAndDETConfiguration(
*ViewportSizeSupport = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
- st_vars->DETBufferSizeInKByteForSwathCalculation);
+ DETBufferSizeInKByteForSwathCalculation);
#endif
- if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
} else {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
}
- if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
*ViewportSizeSupport = false;
@@ -636,7 +643,7 @@ void dml32_CalculateSwathAndDETConfiguration(
#endif
DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
DETBufferSizeC[k] = 0;
- } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
#endif
@@ -654,11 +661,11 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ k, RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
@@ -1867,7 +1874,6 @@ void dml32_CalculateSurfaceSizeInMall(
} // CalculateSurfaceSizeInMall
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -1933,6 +1939,21 @@ void dml32_CalculateVMRowAndSwath(
unsigned int BIGK_FRAGMENT_SIZE[])
{
unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (HostVMEnable == true) {
@@ -1954,15 +1975,15 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
!IsVertical(myPipe[k].SourceRotation)) {
- st_vars->PTEBufferSizeInRequestsForLuma[k] =
+ PTEBufferSizeInRequestsForLuma[k] =
(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
}
- st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -1982,21 +2003,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForChroma[k],
+ PTEBufferSizeInRequestsForChroma[k],
myPipe[k].PitchC,
myPipe[k].DCCMetaPitchC,
myPipe[k].BlockWidthC,
myPipe[k].BlockHeightC,
/* Output */
- &st_vars->MetaRowByteC[k],
- &st_vars->PixelPTEBytesPerRowC[k],
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
&dpte_row_width_chroma_ub[k],
&dpte_row_height_chroma[k],
&dpte_row_height_linear_chroma[k],
- &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
- &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
&meta_req_width_chroma[k],
&meta_req_height_chroma[k],
&meta_row_width_chroma[k],
@@ -2024,19 +2045,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillC[k],
&MaxNumSwathC[k]);
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
- st_vars->PixelPTEBytesPerRowC[k] = 0;
- st_vars->PDEAndMetaPTEBytesFrameC = 0;
- st_vars->MetaRowByteC[k] = 0;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
MaxNumSwathC[k] = 0;
PrefetchSourceLinesC[k] = 0;
- st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
- st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
}
- st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -2056,21 +2077,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForLuma[k],
+ PTEBufferSizeInRequestsForLuma[k],
myPipe[k].PitchY,
myPipe[k].DCCMetaPitchY,
myPipe[k].BlockWidthY,
myPipe[k].BlockHeightY,
/* Output */
- &st_vars->MetaRowByteY[k],
- &st_vars->PixelPTEBytesPerRowY[k],
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
&dpte_row_width_luma_ub[k],
&dpte_row_height_luma[k],
&dpte_row_height_linear_luma[k],
- &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
- &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_luma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
&meta_req_width[k],
&meta_req_height[k],
&meta_row_width[k],
@@ -2098,19 +2119,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillY[k],
&MaxNumSwathY[k]);
- PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
- MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
- if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
PTEBufferSizeNotExceeded[k] = true;
} else {
PTEBufferSizeNotExceeded[k] = false;
}
- st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
- st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
}
dml32_CalculateMALLUseForStaticScreen(
@@ -2118,7 +2139,7 @@ void dml32_CalculateVMRowAndSwath(
MALLAllocatedForDCN,
UseMALLForStaticScreen, // mode
SurfaceSizeInMALL,
- st_vars->one_row_per_frame_fits_in_buffer,
+ one_row_per_frame_fits_in_buffer,
/* Output */
UsesMALLForStaticScreen); // boolen
@@ -2144,13 +2165,13 @@ void dml32_CalculateVMRowAndSwath(
!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
if (use_one_row_for_frame[k]) {
- dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
- dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
- dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
- dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
- PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
}
if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
@@ -2158,7 +2179,7 @@ void dml32_CalculateVMRowAndSwath(
else
DCCMetaBufferSizeNotExceeded[k] = false;
- PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
if (use_one_row_for_frame[k])
PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
@@ -2169,11 +2190,11 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].VRatioChroma,
myPipe[k].DCCEnable,
myPipe[k].HTotal / myPipe[k].PixelClock,
- st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
+ MetaRowByteY[k], MetaRowByteC[k],
meta_row_height[k],
meta_row_height_chroma[k],
- st_vars->PixelPTEBytesPerRowY[k],
- st_vars->PixelPTEBytesPerRowC[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
dpte_row_height_luma[k],
dpte_row_height_chroma[k],
@@ -2189,12 +2210,12 @@ void dml32_CalculateVMRowAndSwath(
dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
__func__, k, dpte_row_width_luma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
__func__, k, dpte_row_height_chroma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
__func__, k, dpte_row_width_chroma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
__func__, k, PTEBufferSizeNotExceeded[k]);
@@ -3342,7 +3363,6 @@ double dml32_CalculateExtraLatency(
} // CalculateExtraLatency
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
@@ -3406,18 +3426,45 @@ bool dml32_CalculatePrefetchSchedule(
double *VReadyOffsetPix)
{
bool MyError = false;
-
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
- st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
- st_vars->Tsw_est1 = 0;
- st_vars->Tsw_est3 = 0;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
if (GPUVMEnable == true && HostVMEnable == true)
- st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
else
- st_vars->HostVMDynamicLevelsTrips = 0;
+ HostVMDynamicLevelsTrips = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
@@ -3440,19 +3487,19 @@ bool dml32_CalculatePrefetchSchedule(
TSetup,
/* output */
- &st_vars->Tdmbf,
- &st_vars->Tdmec,
- &st_vars->Tdmsks,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
VUpdateOffsetPix,
VUpdateWidthPix,
VReadyOffsetPix);
- st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
- st_vars->trip_to_mem = UrgentLatency;
- st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
if (DynamicMetadataVMEnabled == true)
- *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
else
*Tdmdl = TWait + UrgentExtraLatency;
@@ -3462,15 +3509,15 @@ bool dml32_CalculatePrefetchSchedule(
#endif
if (DynamicMetadataEnable == true) {
- if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
*NotEnoughTimeForDynamicMetadata = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
- __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
- __func__, st_vars->Tdmsks);
+ __func__, Tdmsks);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
__func__, *Tdmdl);
#endif
@@ -3482,21 +3529,21 @@ bool dml32_CalculatePrefetchSchedule(
}
*Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
- GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
+ GPUVMEnable == true ? TWait + Tvm_trips : 0);
if (myPipe->ScalerEnabled)
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
else
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
- st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
- st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
+ DISPCLKCycles = DISPCLKDelaySubtotal;
if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
return true;
- *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
@@ -3506,10 +3553,10 @@ bool dml32_CalculatePrefetchSchedule(
+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
- dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
@@ -3522,9 +3569,9 @@ bool dml32_CalculatePrefetchSchedule(
else
*DSTYAfterScaler = 0;
- st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
- *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
- *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
@@ -3532,132 +3579,132 @@ bool dml32_CalculatePrefetchSchedule(
MyError = false;
- st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
if (GPUVMEnable == true) {
- st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
if (GPUVMPageTableLevels >= 3) {
- *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
- (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
} else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
- 4.0 * st_vars->LineTime; // VBA_ERROR
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
*Tno_bw = UrgentExtraLatency;
} else {
*Tno_bw = 0;
}
} else if (myPipe->DCCEnable == true) {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
*Tno_bw = 0;
} else {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
*Tno_bw = 0;
}
- st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
- st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
|| myPipe->SourcePixelFormat == dm_420_12) {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
} else {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
}
- st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
- st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
- st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
- st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
- st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
- st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
if (GPUVMEnable == true) {
- st_vars->Tvm_oto = dml_max3(
- st_vars->Tvm_trips,
- *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
- st_vars->LineTime / 4.0);
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
} else
- st_vars->Tvm_oto = st_vars->LineTime / 4.0;
+ Tvm_oto = LineTime / 4.0;
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_oto = dml_max4(
- st_vars->Tr0_trips,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
- (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
- st_vars->LineTime / 4.0);
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
- dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
#endif
} else
- st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
- st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
- st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
- dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
- dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
- dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
- dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
- dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
- dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
- dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
- dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
- dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
- dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
- dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
- dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
#endif
- st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
- st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
- dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
- __func__, VStartup * st_vars->LineTime);
+ __func__, VStartup * LineTime);
dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
- dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
__func__, *DSTYAfterScaler);
#endif
- st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
- if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
- st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
*PrefetchBandwidth = 0;
*DestinationLinesToRequestVMInVBlank = 0;
@@ -3665,61 +3712,61 @@ bool dml32_CalculatePrefetchSchedule(
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
- if (st_vars->dst_y_prefetch_equ > 1) {
+ if (dst_y_prefetch_equ > 1) {
double PrefetchBandwidth1;
double PrefetchBandwidth2;
double PrefetchBandwidth3;
double PrefetchBandwidth4;
- if (st_vars->Tpre_rounded - *Tno_bw > 0) {
+ if (Tpre_rounded - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
- st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
} else
PrefetchBandwidth1 = 0;
- if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
- && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
}
- if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
- PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
- (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
else
PrefetchBandwidth2 = 0;
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
- st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
} else
PrefetchBandwidth3 = 0;
if (VStartup == MaxVStartup &&
- (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
- st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
}
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
- PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
- (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
} else {
PrefetchBandwidth4 = 0;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
- dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
- dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
- dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
@@ -3732,9 +3779,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth1 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
Case1OK = true;
} else {
Case1OK = false;
@@ -3745,9 +3792,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth2 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
Case2OK = true;
} else {
Case2OK = false;
@@ -3758,9 +3805,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth3 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
- st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
- st_vars->Tr0_trips_rounded) {
+ Tr0_trips_rounded) {
Case3OK = true;
} else {
Case3OK = false;
@@ -3770,80 +3817,80 @@ bool dml32_CalculatePrefetchSchedule(
}
if (Case1OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth1;
+ prefetch_bw_equ = PrefetchBandwidth1;
else if (Case2OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth2;
+ prefetch_bw_equ = PrefetchBandwidth2;
else if (Case3OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth3;
+ prefetch_bw_equ = PrefetchBandwidth3;
else
- st_vars->prefetch_bw_equ = PrefetchBandwidth4;
+ prefetch_bw_equ = PrefetchBandwidth4;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
- dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
#endif
- if (st_vars->prefetch_bw_equ > 0) {
+ if (prefetch_bw_equ > 0) {
if (GPUVMEnable == true) {
- st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
- HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
- st_vars->Tvm_trips, st_vars->LineTime / 4);
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
} else {
- st_vars->Tvm_equ = st_vars->LineTime / 4;
+ Tvm_equ = LineTime / 4;
}
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
- HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
- (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
} else {
- st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
}
} else {
- st_vars->Tvm_equ = 0;
- st_vars->Tr0_equ = 0;
+ Tvm_equ = 0;
+ Tr0_equ = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
#endif
}
}
- if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
- *PrefetchBandwidth = st_vars->prefetch_bw_oto;
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
} else {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
- *PrefetchBandwidth = st_vars->prefetch_bw_equ;
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
}
- *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
*DestinationLinesToRequestRowInVBlank =
- dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
- st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
__func__, *DestinationLinesToRequestRowInVBlank);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
- dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
#endif
- if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
- *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
@@ -3851,12 +3898,12 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
#endif
if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
*VRatioPrefetchY =
dml_max((double) PrefetchSourceLinesY /
- st_vars->LinesToRequestPrefetchPixelData,
+ LinesToRequestPrefetchPixelData,
(double) MaxNumSwathY * SwathHeightY /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillY - 3.0) / 2.0));
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
} else {
@@ -3870,7 +3917,7 @@ bool dml32_CalculatePrefetchSchedule(
#endif
}
- *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
#ifdef __DML_VBA_DEBUG__
@@ -3879,11 +3926,11 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
#endif
if ((SwathHeightC > 4)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC =
dml_max(*VRatioPrefetchC,
(double) MaxNumSwathC * SwathHeightC /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillC - 3.0) / 2.0));
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
} else {
@@ -3898,25 +3945,25 @@ bool dml32_CalculatePrefetchSchedule(
}
*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
- / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
- / st_vars->LineTime;
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
__func__, *RequiredPrefetchPixDataBWLuma);
#endif
*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
- st_vars->LinesToRequestPrefetchPixelData
+ LinesToRequestPrefetchPixelData
* myPipe->BytePerPixelC
- * swath_width_chroma_ub / st_vars->LineTime;
+ * swath_width_chroma_ub / LineTime;
} else {
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
- __func__, st_vars->LinesToRequestPrefetchPixelData);
+ __func__, LinesToRequestPrefetchPixelData);
#endif
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
@@ -3925,15 +3972,15 @@ bool dml32_CalculatePrefetchSchedule(
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
- (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
- 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
- dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
- (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
- dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
- st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
- ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
PixelPTEBytesPerRow);
#endif
@@ -3941,7 +3988,7 @@ bool dml32_CalculatePrefetchSchedule(
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
- __func__, st_vars->dst_y_prefetch_equ);
+ __func__, dst_y_prefetch_equ);
#endif
}
@@ -3957,10 +4004,10 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
#endif
prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
- (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
@@ -3977,7 +4024,7 @@ bool dml32_CalculatePrefetchSchedule(
prefetch_row_bw = 0;
} else if (*DestinationLinesToRequestRowInVBlank > 0) {
prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
- (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
@@ -4000,12 +4047,12 @@ bool dml32_CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
+ LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -4159,7 +4206,6 @@ void dml32_CalculateFlipSchedule(
} // CalculateFlipSchedule
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
bool USRRetrainingRequiredFinal,
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int PrefetchMode,
@@ -4221,15 +4267,37 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
double ActiveDRAMClockChangeLatencyMargin[])
{
unsigned int i, j, k;
-
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
- st_vars->DRAMClockChangeSupportNumber = 0;
- st_vars->DRAMClockChangeMethod = 0;
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
- st_vars->MinActiveFCLKChangeMargin = 0.;
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
- st_vars->TotalPixelBW = 0.0;
- st_vars->TotalActiveWriteback = 0;
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
@@ -4261,13 +4329,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
#endif
- st_vars->TotalActiveWriteback = 0;
+ TotalActiveWriteback = 0;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (WritebackEnable[k] == true)
- st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
+ TotalActiveWriteback = TotalActiveWriteback + 1;
}
- if (st_vars->TotalActiveWriteback <= 1) {
+ if (TotalActiveWriteback <= 1) {
Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
} else {
Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
@@ -4277,7 +4345,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
+ mmSOCParameters.USRRetrainingLatency;
- if (st_vars->TotalActiveWriteback <= 1) {
+ if (TotalActiveWriteback <= 1) {
Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ mmSOCParameters.WritebackLatency;
Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
@@ -4307,14 +4375,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
#endif
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
- st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
#ifdef __DML_VBA_DEBUG__
@@ -4325,72 +4393,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
#endif
- st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
if (UnboundedRequestEnabled) {
- st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ CompressedBufferSizeInkByte * 1024
* (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
- / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
+ / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
}
- st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
- st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
- st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
/ PixelClock[k] / VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
- st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
- st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
- st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
/ VRatioChroma[k];
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
/ PixelClock[k];
if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
/ PixelClock[k] / VRatioChroma[k];
}
- st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
- st_vars->ActiveClockChangeLatencyHidingC);
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
} else {
- st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
}
- ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- Watermark->DRAMClockChangeWatermark;
- st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- Watermark->FCLKChangeWatermark;
- st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
if (WritebackEnable[k]) {
- st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
+ WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
/ (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
/ (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
if (WritebackPixelFormat[k] == dm_444_64)
- st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
- st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
- Watermark->WritebackDRAMClockChangeWatermark;
- st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
- Watermark->WritebackFCLKChangeWatermark;
ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
- st_vars->WritebackFCLKChangeLatencyMargin);
- st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
- st_vars->WritebackDRAMClockChangeLatencyMargin);
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
}
MaxActiveDRAMClockChangeLatencySupported[k] =
(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
@@ -4409,41 +4477,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
(DRRDisplay[i] || DRRDisplay[j]))) {
- st_vars->SynchronizedSurfaces[i][j] = true;
+ SynchronizedSurfaces[i][j] = true;
} else {
- st_vars->SynchronizedSurfaces[i][j] = false;
+ SynchronizedSurfaces[i][j] = false;
}
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
- st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
}
}
- *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
- st_vars->SameTimingForFCLKChange = true;
+ SameTimingForFCLKChange = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->SameTimingForFCLKChange ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] <
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
}
- st_vars->SameTimingForFCLKChange = false;
+ SameTimingForFCLKChange = false;
}
}
- if (st_vars->MinActiveFCLKChangeMargin > 0) {
+ if (MinActiveFCLKChangeMargin > 0) {
*FCLKChangeSupport = dm_fclock_change_vactive;
- } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
(PrefetchMode <= 1)) {
*FCLKChangeSupport = dm_fclock_change_vblank;
} else {
@@ -4453,7 +4521,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
*USRRetrainingSupport = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
+ (USRRetrainingLatencyMargin[k] < 0)) {
*USRRetrainingSupport = false;
}
}
@@ -4464,42 +4532,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
ActiveDRAMClockChangeLatencyMargin[k] < 0) {
if (PrefetchMode > 0) {
- st_vars->DRAMClockChangeSupportNumber = 2;
- } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
- st_vars->DRAMClockChangeSupportNumber = 1;
- st_vars->LastSurfaceWithoutMargin = k;
- } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
- !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
- st_vars->DRAMClockChangeSupportNumber = 2;
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
}
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
- st_vars->DRAMClockChangeMethod = 1;
+ DRAMClockChangeMethod = 1;
else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
- st_vars->DRAMClockChangeMethod = 2;
+ DRAMClockChangeMethod = 2;
}
- if (st_vars->DRAMClockChangeMethod == 0) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
- } else if (st_vars->DRAMClockChangeMethod == 1) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
} else {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
@@ -4513,7 +4581,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
- src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
#ifdef __DML_VBA_DEBUG__
@@ -4521,7 +4589,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET
dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
-dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
@@ -4532,7 +4600,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l
if (BytePerPixelDETC[k] > 0) {
src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
- src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 37a314ce284b..d293856ba906 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -30,7 +30,6 @@
#include "os_types.h"
#include "../dc_features.h"
#include "../display_mode_structs.h"
-#include "dml/display_mode_vba.h"
unsigned int dml32_dscceComputeDelay(
unsigned int bpc,
@@ -82,7 +81,6 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
double *DPPCLKUsingSingleDPP);
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -362,7 +360,6 @@ void dml32_CalculateSurfaceSizeInMall(
bool *ExceededMALLSize);
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -715,7 +712,6 @@ double dml32_CalculateExtraLatency(
unsigned int HostVMMaxNonCachedPageTableLevels);
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
@@ -811,7 +807,6 @@ void dml32_CalculateFlipSchedule(
bool *ImmediateFlipSupportedForPipe);
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
bool USRRetrainingRequiredFinal,
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int PrefetchMode,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index 84b4b00f29cb..c87091683b5d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_21_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 8460aefe7b6d..492aec634b68 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -182,108 +182,6 @@ void Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
-struct dml32_CalculateSwathAndDETConfiguration {
- unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
- unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpSwathSizeBytesY;
- unsigned int RoundedUpSwathSizeBytesC;
- double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
- double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
- unsigned int TotalActiveDPP;
- bool NoChromaSurfaces;
- unsigned int DETBufferSizeInKByteForSwathCalculation;
-};
-
-struct dml32_CalculateVMRowAndSwath {
- unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
- unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
- unsigned int PDEAndMetaPTEBytesFrameY;
- unsigned int PDEAndMetaPTEBytesFrameC;
- unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
- unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
- bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport {
- unsigned int SurfaceWithMinActiveFCLKChangeMargin;
- unsigned int DRAMClockChangeSupportNumber;
- unsigned int LastSurfaceWithoutMargin;
- unsigned int DRAMClockChangeMethod;
- bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin;
- double MinActiveFCLKChangeMargin;
- double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank;
- double ActiveClockChangeLatencyHidingY;
- double ActiveClockChangeLatencyHidingC;
- double ActiveClockChangeLatencyHiding;
- double EffectiveDETBufferSizeY;
- double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
- double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
- double TotalPixelBW;
- bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
- double EffectiveLBLatencyHidingY;
- double EffectiveLBLatencyHidingC;
- double LinesInDETY[DC__NUM_DPP__MAX];
- double LinesInDETC[DC__NUM_DPP__MAX];
- unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
- unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
- double FullDETBufferingTimeY;
- double FullDETBufferingTimeC;
- double WritebackDRAMClockChangeLatencyMargin;
- double WritebackFCLKChangeLatencyMargin;
- double WritebackLatencyHiding;
- bool SameTimingForFCLKChange;
- unsigned int TotalActiveWriteback;
- unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
- unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculatePrefetchSchedule {
- unsigned int DPPCycles, DISPCLKCycles;
- double DSTTotalPixelsAfterScaler;
- double LineTime;
- double dst_y_prefetch_equ;
- double prefetch_bw_oto;
- double Tvm_oto;
- double Tr0_oto;
- double Tvm_oto_lines;
- double Tr0_oto_lines;
- double dst_y_prefetch_oto;
- double TimeForFetchingMetaPTE;
- double TimeForFetchingRowInVBlank;
- double LinesToRequestPrefetchPixelData;
- unsigned int HostVMDynamicLevelsTrips;
- double trip_to_mem;
- double Tvm_trips;
- double Tr0_trips;
- double Tvm_trips_rounded;
- double Tr0_trips_rounded;
- double Lsw_oto;
- double Tpre_rounded;
- double prefetch_bw_equ;
- double Tvm_equ;
- double Tr0_equ;
- double Tdmbf;
- double Tdmec;
- double Tdmsks;
- double prefetch_sw_bytes;
- double bytes_pp;
- double dep_bytes;
- unsigned int max_vratio_pre;
- double min_Lsw;
- double Tsw_est1;
- double Tsw_est3;
-};
-
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
double dummy_single_array[2][DC__NUM_DPP__MAX];
@@ -355,10 +253,6 @@ struct dummy_vars {
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
- struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration;
- struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath;
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport;
- struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule;
};
struct vba_vars_st {