diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c')
-rw-r--r-- | drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c new file mode 100644 index 000000000000..c4c52173ef22 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dml2_policy.h" + +static void get_optimal_ntuple( + const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, + struct soc_state_bounding_box_st *entry) +{ + float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * + socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); + + float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); + + float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); + + float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; + + if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; + + if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) + limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; + + return limiting_bw_mbytes_sec; +} + +static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, + struct soc_states_st *table, + struct soc_state_bounding_box_st *entry) +{ + int index = 0; + int i = 0; + float net_bw_of_new_state = 0; + + get_optimal_ntuple(socbb, entry); + + if (table->num_states == 0) { + index = 0; + } else { + net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); + while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) { + index++; + if (index >= (int) table->num_states) + break; + } + + for (i = table->num_states; i > index; i--) { + table->state_array[i] = table->state_array[i - 1]; + } + //ASSERT(index < MAX_CLK_TABLE_SIZE); + } + + table->state_array[index] = *entry; + table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; + table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; + table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; + table->num_states++; +} + +static void remove_entry_from_table_at_index(struct soc_states_st *table, + unsigned int index) +{ + int i; + + if (table->num_states == 0) + return; + + for (i = index; i < (int) table->num_states - 1; i++) { + table->state_array[i] = table->state_array[i + 1]; + } + memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); +} + +int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, + struct dml2_policy_build_synthetic_soc_states_params *p) +{ + int i, j; + unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; + unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; + unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; + + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, + max_uclk_mhz = 0, max_socclk_mhz = 0; + + int num_uclk_dpms = 0, num_fclk_dpms = 0; + + for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + + if (p->in_states->state_array[i].fabricclk_mhz > 0) + num_fclk_dpms++; + if (p->in_states->state_array[i].dram_speed_mts > 0) + num_uclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) + return -1; + + p->out_states->num_states = 0; + + s->entry = p->in_states->state_array[0]; + + s->entry.dispclk_mhz = max_dispclk_mhz; + s->entry.dppclk_mhz = max_dppclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + s->entry.phyclk_mhz = max_phyclk_mhz; + + s->entry.dscclk_mhz = max_dispclk_mhz / 3; + s->entry.phyclk_mhz = max_phyclk_mhz; + s->entry.dtbclk_mhz = max_dtbclk_mhz; + + // Insert all the DCFCLK STAs first + for (i = 0; i < p->num_dcfclk_stas; i++) { + s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = 0; + if (i > 0) + s->entry.socclk_mhz = max_socclk_mhz; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = 0; + s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + if (i == 0) { + s->entry.socclk_mhz = min_socclk_mhz; + } else { + s->entry.socclk_mhz = max_socclk_mhz; + } + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Insert FCLK DPMs (if present) + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + } + // Add max FCLK + else { + s->entry.dcfclk_mhz = 0; + s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; + s->entry.dram_speed_mts = 0; + + insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); + } + + // Remove states that require higher clocks than are supported + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || + p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || + p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) + remove_entry_from_table_at_index(p->out_states, i); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Round up UCLK to DPMs + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = p->out_states->num_states - 1; i >= 0; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; + break; + } + } + } + } + + // Clamp to min FCLK/DCFCLK + for (i = p->out_states->num_states - 1; i >= 0; i--) { + if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { + p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; + } + if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { + p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < (int) p->out_states->num_states - 1) { + if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && + p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && + p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(p->out_states, i); + else + i++; + } + + return 0; +} + +void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) +{ + for (int i = 0; i < __DML_NUM_PLANES__; i++) { + policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? + policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; + policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; + policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; + } + + /* Change the default policy initializations as per spreadsheet. We might need to + * review and change them later on as per Jun's earlier comments. + */ + policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; + policy->UseMinimumRequiredDCFCLK = false; + policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? + policy->USRRetrainingRequiredFinal = true; + policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? + policy->NomDETInKByteOverrideEnable = false; + policy->NomDETInKByteOverrideValue = 0; + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; + policy->SynchronizeTimingsFinal = true; + policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; + policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? + policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? + if (project == dml_project_dcn35 || + project == dml_project_dcn351) { + policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; + policy->EnhancedPrefetchScheduleAccelerationFinal = 0; + policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ + policy->UseOnlyMaxPrefetchModes = 1; + } +} |